2025-11-07 05:41:18 +00:00
|
|
|
"""
|
|
|
|
|
This module contains interchangeable engines for optical character recognition,
|
|
|
|
|
making it easy to swap implementations in and out based on speed and accuracy
|
|
|
|
|
advantages without rewriting business logic.
|
|
|
|
|
"""
|
2025-12-20 02:16:41 +00:00
|
|
|
|
|
|
|
|
from dataclasses import dataclass
|
|
|
|
|
from typing import Optional
|
|
|
|
|
|
|
|
|
|
from PIL import Image
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
|
class TextBlock:
|
|
|
|
|
"""
|
|
|
|
|
Attributes:
|
|
|
|
|
|
|
|
|
|
x0 Left coordinate of the bounding box, in pixels.
|
|
|
|
|
|
|
|
|
|
y0 Top coordinate of the bounding box, in pixels.
|
|
|
|
|
|
|
|
|
|
x1 Right coordinate of the bounding box, in pixels from left of
|
|
|
|
|
image.
|
|
|
|
|
|
|
|
|
|
y1 Bottom coordinate of the bounding box, in pixels from top of
|
|
|
|
|
image.
|
|
|
|
|
|
|
|
|
|
text Text content of the block.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
x0: int
|
|
|
|
|
y0: int
|
|
|
|
|
x1: int
|
|
|
|
|
y1: int
|
|
|
|
|
text: str
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
|
class OcrResult:
|
|
|
|
|
"""
|
|
|
|
|
OCR data parsed from a single page.
|
|
|
|
|
|
|
|
|
|
Attributes:
|
|
|
|
|
|
|
|
|
|
blocks Blocks of text detected on a page.
|
|
|
|
|
|
|
|
|
|
page_angle Optional detected rotation of the page, in degrees clockwise
|
|
|
|
|
relative to upright.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
blocks: list[TextBlock]
|
|
|
|
|
page_angle: Optional[float]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class OcrEngine:
|
|
|
|
|
"""
|
|
|
|
|
Abstract class for interchangeable OCR processing backends.
|
|
|
|
|
|
|
|
|
|
Params:
|
|
|
|
|
|
|
|
|
|
detect_angle Allows page angle detection to be enabled or disabled
|
|
|
|
|
for certain implementations. Defaults to True.
|
|
|
|
|
|
|
|
|
|
languages List of ISO-639-3 language codes fed to the OCR backend.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
_detect_angle: bool
|
|
|
|
|
_languages: list[str]
|
|
|
|
|
|
|
|
|
|
def __init__(self, languages: list[str], detect_angle: bool = True):
|
|
|
|
|
self._detect_angle = detect_angle
|
|
|
|
|
self._languages = languages.copy()
|
|
|
|
|
|
|
|
|
|
def process(self, image: Image.Image) -> OcrResult:
|
|
|
|
|
raise NotImplementedError()
|