MicroQA/microqa/ocr/__init__.py
2025-12-20 02:17:03 +00:00

75 lines
1.7 KiB
Python

"""
This module contains interchangeable engines for optical character recognition,
making it easy to swap implementations in and out based on speed and accuracy
advantages without rewriting business logic.
"""
from dataclasses import dataclass
from typing import Optional
from PIL import Image
@dataclass
class TextBlock:
"""
Attributes:
x0 Left coordinate of the bounding box, in pixels.
y0 Top coordinate of the bounding box, in pixels.
x1 Right coordinate of the bounding box, in pixels from left of
image.
y1 Bottom coordinate of the bounding box, in pixels from top of
image.
text Text content of the block.
"""
x0: int
y0: int
x1: int
y1: int
text: str
@dataclass
class OcrResult:
"""
OCR data parsed from a single page.
Attributes:
blocks Blocks of text detected on a page.
page_angle Optional detected rotation of the page, in degrees clockwise
relative to upright.
"""
blocks: list[TextBlock]
page_angle: Optional[float]
class OcrEngine:
"""
Abstract class for interchangeable OCR processing backends.
Params:
detect_angle Allows page angle detection to be enabled or disabled
for certain implementations. Defaults to True.
languages List of ISO-639-3 language codes fed to the OCR backend.
"""
_detect_angle: bool
_languages: list[str]
def __init__(self, languages: list[str], detect_angle: bool = True):
self._detect_angle = detect_angle
self._languages = languages.copy()
def process(self, image: Image.Image) -> OcrResult:
raise NotImplementedError()