replace sharpness edge detection with gradient

This commit is contained in:
Brent Schroeter 2026-01-15 21:31:24 +00:00
parent ce42ab58f1
commit 9a94719dc1

View file

@ -1,7 +1,7 @@
from sys import stdout
import numpy as np
from PIL import Image, ImageChops, ImageFilter
from PIL import Image
from .items import ArchiveDoc, ArchiveLeaf
from .ocr import OcrEngine, TextBlock
@ -178,11 +178,15 @@ def normalize_contrast_for_text(im: Image.Image) -> tuple[Image.Image, bool]:
), False
def analyze_sharpness(im: Image.Image):
def analyze_sharpness(im: Image.Image) -> float:
"""
Crudely quantifies the "sharpness" of edges in an image, on a scale of 0 to
1, by measuring peak intensity of a high-pass filter.
Attempts to quantify the sharpness an image, on a scale of 0 to 1.
"""
# Inferring sharpness by measuring the peak intensity of an edge detection/
# high pass filter over the image tends to produce different baseline
# results across documents. We've had much more luck with a direct gradient
# computation based on https://stackoverflow.com/a/26014796.
grad_y, grad_x = np.gradient(np.asarray(im))
return float(np.clip(np.quantile(np.sqrt(grad_x**2 + grad_y**2), 0.99) / 255, 0, 1))
"""
blurred = im.filter(ImageFilter.GaussianBlur(8))
diff = ImageChops.difference(im, blurred)
return np.quantile(diff, 0.999) / 255