MicroQA/microqa/ocr/__init__.py

"""
This module contains interchangeable engines for optical character recognition,
making it easy to swap implementations in and out based on speed and accuracy
advantages without rewriting business logic.
"""

from dataclasses import dataclass
from typing import Optional

from PIL import Image


@dataclass
class TextBlock:
    """
    Attributes:

        x0      Left coordinate of the bounding box, in pixels.

        y0      Top coordinate of the bounding box, in pixels.

        x1      Right coordinate of the bounding box, in pixels from left of
                image.

        y1      Bottom coordinate of the bounding box, in pixels from top of
                image.

        text    Text content of the block.
    """

    x0: int
    y0: int
    x1: int
    y1: int
    text: str


@dataclass
class OcrResult:
    """
    OCR data parsed from a single page.

    Attributes:

        blocks      Blocks of text detected on a page.

        page_angle  Optional detected rotation of the page, in degrees clockwise
                    relative to upright.
    """

    blocks: list[TextBlock]
    page_angle: Optional[float]


class OcrEngine:
    """
    Abstract class for interchangeable OCR processing backends.

    Params:

        detect_angle    Allows page angle detection to be enabled or disabled
                        for certain implementations. Defaults to True.

        languages       List of ISO-639-3 language codes fed to the OCR backend.
    """

    _detect_angle: bool
    _languages: list[str]

    def __init__(self, languages: list[str], detect_angle: bool = True):
        self._detect_angle = detect_angle
        self._languages = languages.copy()

    def process(self, image: Image.Image) -> OcrResult:
        raise NotImplementedError()