replace sharpness edge detection with gradient

2026-01-15 21:31:24 +00:00 · 2026-01-15 21:31:24 +00:00 · 9a94719dc1
commit 9a94719dc1
parent ce42ab58f1
1 changed files with 11 additions and 7 deletions
--- a/microqa/engine.py
+++ b/microqa/engine.py
@ -1,7 +1,7 @@
 from sys import stdout

 import numpy as np
-from PIL import Image, ImageChops, ImageFilter
+from PIL import Image

 from .items import ArchiveDoc, ArchiveLeaf
 from .ocr import OcrEngine, TextBlock
@ -178,11 +178,15 @@ def normalize_contrast_for_text(im: Image.Image) -> tuple[Image.Image, bool]:
    ), False


-def analyze_sharpness(im: Image.Image):
+def analyze_sharpness(im: Image.Image) -> float:
    """
-    Crudely quantifies the "sharpness" of edges in an image, on a scale of 0 to
-    1, by measuring peak intensity of a high-pass filter.
+    Attempts to quantify the sharpness an image, on a scale of 0 to 1.
+    """
+
+    # Inferring sharpness by measuring the peak intensity of an edge detection/
+    # high pass filter over the image tends to produce different baseline
+    # results across documents. We've had much more luck with a direct gradient
+    # computation based on https://stackoverflow.com/a/26014796.
+    grad_y, grad_x = np.gradient(np.asarray(im))
+    return float(np.clip(np.quantile(np.sqrt(grad_x**2 + grad_y**2), 0.99) / 255, 0, 1))
    """
-    blurred = im.filter(ImageFilter.GaussianBlur(8))
-    diff = ImageChops.difference(im, blurred)
-    return np.quantile(diff, 0.999) / 255