replace sharpness edge detection with gradient

This commit is contained in:
Brent Schroeter 2026-01-15 21:31:24 +00:00
parent ce42ab58f1
commit 9a94719dc1

View file

@ -1,7 +1,7 @@
from sys import stdout from sys import stdout
import numpy as np import numpy as np
from PIL import Image, ImageChops, ImageFilter from PIL import Image
from .items import ArchiveDoc, ArchiveLeaf from .items import ArchiveDoc, ArchiveLeaf
from .ocr import OcrEngine, TextBlock from .ocr import OcrEngine, TextBlock
@ -178,11 +178,15 @@ def normalize_contrast_for_text(im: Image.Image) -> tuple[Image.Image, bool]:
), False ), False
def analyze_sharpness(im: Image.Image): def analyze_sharpness(im: Image.Image) -> float:
""" """
Crudely quantifies the "sharpness" of edges in an image, on a scale of 0 to Attempts to quantify the sharpness an image, on a scale of 0 to 1.
1, by measuring peak intensity of a high-pass filter. """
# Inferring sharpness by measuring the peak intensity of an edge detection/
# high pass filter over the image tends to produce different baseline
# results across documents. We've had much more luck with a direct gradient
# computation based on https://stackoverflow.com/a/26014796.
grad_y, grad_x = np.gradient(np.asarray(im))
return float(np.clip(np.quantile(np.sqrt(grad_x**2 + grad_y**2), 0.99) / 255, 0, 1))
""" """
blurred = im.filter(ImageFilter.GaussianBlur(8))
diff = ImageChops.difference(im, blurred)
return np.quantile(diff, 0.999) / 255