From d48b672e1bd0e9f30763db6e0bf1e9f32ef870f6 Mon Sep 17 00:00:00 2001 From: Brent Schroeter Date: Sat, 20 Dec 2025 08:58:49 +0000 Subject: [PATCH] improve contrast norm and sharpness measurement --- diagnostics.py | 2 +- microqa/engine.py | 274 ++++++++++++++++++--------------------- microqa/items.py | 4 +- microqa/ocr/tesseract.py | 17 --- pyproject.toml | 2 - uv.lock | 174 +------------------------ 6 files changed, 133 insertions(+), 340 deletions(-) diff --git a/diagnostics.py b/diagnostics.py index 493961e..c6171ad 100644 --- a/diagnostics.py +++ b/diagnostics.py @@ -40,7 +40,7 @@ def main(): t_start = time() minimal_docs = ( - [doc for doc in item.docs if doc.name != ""] + [doc for doc in item.docs if doc.name != doc.identifier] if len(item.docs) > 1 else item.docs ) diff --git a/microqa/engine.py b/microqa/engine.py index d4abb06..28ec802 100644 --- a/microqa/engine.py +++ b/microqa/engine.py @@ -1,7 +1,7 @@ from sys import stdout import numpy as np -from PIL import Image, ImageFilter +from PIL import Image, ImageChops, ImageFilter from .items import ArchiveDoc, ArchiveLeaf from .ocr import OcrEngine, TextBlock @@ -30,139 +30,100 @@ def analyze_doc( analyzed_pages = [] for leaf in all_leaves: - im_cropped = leaf.image.crop( + im, is_blank = normalize_contrast_for_text(leaf.image) + + im_cropped = im.crop( ( - leaf.image.size[0] * 0.1, - leaf.image.size[1] * 0.1, - leaf.image.size[0] * 0.9, - leaf.image.size[1] * 0.9, + im.size[0] * 0.1, + im.size[1] * 0.1, + im.size[0] * 0.9, + im.size[1] * 0.9, ) ) + sharpness = analyze_sharpness(im_cropped) - is_blank = im_cropped.getextrema()[0] > 255 * 0.8 - - if is_blank: - max_sharpness = 1 - text_margin_px = -1 + # OCR is computationally expensive, so we try to take advantage of + # the Tesseract data already parsed by the Internet Archive and + # embedded in the PDF, when possible. If there is not sufficient + # text in the PDF to be confident that the Archive's OCR + # postprocessing captured it all, then OCR is recomputed locally. + # + # In some instances, the Archive's OCR detects rotated text but + # parses it as gibberish. To partially mitigate this, we ignore all + # precomputed text blocks with a "portrait" aspect ratio. This will + # not necessarily help with text that is rotated 180 degrees, but in + # practice that case is rarely encountered. This will also not work + # well with non-latin scripts that are intended to be oriented + # vertically. + OCR_RECOMPUTE_THRESHOLD_WORDS = 30 + if ( + sum( + ( + len(block.text.split()) + for block in leaf.text_blocks + if block.x1 - block.x0 > block.y1 - block.y0 + ) + ) + >= OCR_RECOMPUTE_THRESHOLD_WORDS + ): + ocred_leaf = leaf page_angle = 0 else: - # Sharpness is determined by percentile of pixels that match some - # criteria, so it may vary significantly depending on which portion - # of the image is analyzed. In an effort to identify the sharpest - # edges, we split up the image into chunks and assume that the - # highest sharpness value obtained across all chunks is - # representative of the image as a whole. - max_sharpness = 0.0 - if im_cropped.size[0] < im_cropped.size[1]: - # Page is in portrait orientation. - segments_x = 2 - segments_y = 3 - else: - # Page is in landscape orientation. - segments_x = 3 - segments_y = 2 - for i in range(segments_x): - for j in range(segments_y): - max_sharpness = max( - max_sharpness, - analyze_sharpness( - im_cropped.crop( - ( - im_cropped.size[0] / segments_x * i, - im_cropped.size[1] / segments_y * j, - im_cropped.size[0] / segments_x * (i + 1), - im_cropped.size[1] / segments_y * (j + 1), - ) - ) - ), + OCR_SCALE = 1 + im_scaled = im.resize(np.int_(np.array(im.size) * OCR_SCALE)) + ocr_result = ocr_engine.process(im_scaled) + ocred_leaf = ArchiveLeaf( + image=im, + page_number=leaf.page_number, + text_blocks=[ + TextBlock( + x0=int(block.x0 / OCR_SCALE), + y0=int(block.y0 / OCR_SCALE), + x1=int(block.x1 / OCR_SCALE), + y1=int(block.y1 / OCR_SCALE), + text=block.text, ) + for block in ocr_result.blocks + ], + ) + page_angle = ocr_result.page_angle - # OCR is computationally expensive, so we try to take advantage of - # the Tesseract data already parsed by the Internet Archive and - # embedded in the PDF, when possible. If there is not sufficient - # text in the PDF to be confident that the Archive's OCR - # postprocessing captured it all, then OCR is recomputed locally. - # - # In some instances, the Archive's OCR detects rotated text but - # parses it as gibberish. To partially mitigate this, we ignore all - # precomputed text blocks with a "portrait" aspect ratio. This will - # not necessarily help with text that is rotated 180 degrees, but in - # practice that case is rarely encountered. This will also not work - # well with non-latin scripts that are intended to be oriented - # vertically. - OCR_RECOMPUTE_THRESHOLD_WORDS = 30 - if ( - sum( - ( - len(block.text.split()) - for block in leaf.text_blocks - if block.x1 - block.x0 > block.y1 - block.y0 - ) - ) - >= OCR_RECOMPUTE_THRESHOLD_WORDS - ): - if verbose: - print("Using PDF text.") - ocred_leaf = leaf - page_angle = 0 - else: - if verbose: - print("Using OCR.") - OCR_SCALE = 1 - im_scaled = leaf.image.resize( - np.int_(np.array(leaf.image.size) * OCR_SCALE) - ) - ocr_result = ocr_engine.process(im_scaled) - ocred_leaf = ArchiveLeaf( - image=leaf.image, - page_number=leaf.page_number, - text_blocks=[ - TextBlock( - x0=int(block.x0 / OCR_SCALE), - y0=int(block.y0 / OCR_SCALE), - x1=int(block.x1 / OCR_SCALE), - y1=int(block.y1 / OCR_SCALE), - text=block.text, + word_margins_all_directions = ( + np.sort( + np.concat( + [ + np.array( + [ + block.x0, + block.y0, + im.size[0] - block.x1, + im.size[1] - block.y1, + ] ) - for block in ocr_result.blocks - ], - ) - page_angle = ocr_result.page_angle - - word_margins_all_directions = np.sort( - np.int_( - np.concat( - [ - np.array( - [ - block.x0, - block.y0, - leaf.image.size[0] - block.x1, - leaf.image.size[1] - block.y1, - ] - ) - for block in ocred_leaf.text_blocks - ] - ) - ) - ) - # Skip the n closest words to the edge, to help ignore stray OCR artifacts. - SKIP_WORDS = 2 - text_margin_px = int( - word_margins_all_directions[SKIP_WORDS] - if word_margins_all_directions.shape[0] > SKIP_WORDS - else -1 + for block in ocred_leaf.text_blocks + ] + ).astype(np.int_) ) + if len(ocred_leaf.text_blocks) > 0 + else np.array([]) + ) + # Skip the n closest words to the edge, to help ignore stray OCR artifacts. + SKIP_WORDS = 2 + text_margin_px = int( + word_margins_all_directions[SKIP_WORDS] + if word_margins_all_directions.shape[0] > SKIP_WORDS + else -1 + ) # Make sure the OCR engine is running with orientation detection. assert page_angle is not None analyzed_pages.append( { - "blank": is_blank, + "is_blank": is_blank, "page_angle": page_angle, - "size_analyzed": leaf.image.size, - "sharpness": max_sharpness, + "size_analyzed": im.size, + "sharpness": sharpness, "text_margin_px": text_margin_px, } ) @@ -170,35 +131,58 @@ def analyze_doc( return {"pages": analyzed_pages} +def normalize_contrast_for_text(im: Image.Image) -> tuple[Image.Image, bool]: + """ + Most of the pages being analyzed, and virtually all of the pages we care + about for the purposes of QA, primarily contain text on a contrasting + background. We can therefore typically assume that it is reasonable to boost + contrast so that the lightest pixels are nearly white and the darkest pixels + are nearly black. This can help make analysis more consistent across leaves + with varying contrast ratios due to varied scanner settings, contrast ratios + of the original documnets, or weathering/fading of the physical fiche. + + Processed leaves usually contain some amount of margin around the edges + where the backlight of the scanning rig is visible through the unexposed + region of the negative, so contrast detection is heavily center-weighted. + + Params: + + im Scan image as a 2-dimensional numpy array. (Use `np.asarray()` to + convert PIL `Image` objects to an array format.) + + Returns: + + (normalized_image, is_blank) + """ + pixel_values = np.asarray( + im.crop( + ( + im.size[0] * 0.1, + im.size[1] * 0.1, + im.size[0] * 0.9, + im.size[1] * 0.9, + ) + ) + ) + # To avoid extreme outliers, use quantiles instead of absolute extrema. + extrema = (np.quantile(pixel_values, 0.002), np.quantile(pixel_values, 0.998)) + if extrema[1] - extrema[0] < 64: + # Assume there is essentially no content here and return the original. + return im, True + + # Apply a rudimentary tone curve to the image, with the goal that the + # extrema we just calculated will evaluate to values "pretty close to" 0% + # and 100% of the available range. + return im.point( + lambda x: np.interp(x, (0, extrema[0], extrema[1], 255), (0, 8, 247, 255)) + ), False + + def analyze_sharpness(im: Image.Image): """ Crudely quantifies the "sharpness" of edges in an image, on a scale of 0 to - 1. The scale is not linear with respect to scan quality: anything above 0.1 - is usually fine. + 1, by measuring peak intensity of a high-pass filter. """ - arr = np.asarray(im) - - # Normalize contrast based on brightest and darkest pixels. For example, - # NORM_QUANTILE=0.1 will attempt to transform pixel values so that 80% fall - # between 10% brightness and 90% brightness. In practice, a value around - # 0.02 seems to work fairly well. - NORM_QUANTILE = 0.03 - pixel_range = np.quantile(arr, 1.0 - NORM_QUANTILE) - np.quantile( - arr, NORM_QUANTILE - ) - if pixel_range == 0: - arr_normalized = arr - else: - arr_normalized = arr * (1.0 - NORM_QUANTILE * 2) / pixel_range - arr_normalized = ( - arr_normalized - np.quantile(arr_normalized, NORM_QUANTILE) + NORM_QUANTILE - ) - arr_normalized = np.uint8(np.clip(arr_normalized, 0, 1) * 255) - - # "Sharpness" is determined by measuring the median intensity of pixels - # near edges, after an edge detection filter has been applied to the image. - edges_arr = np.asarray( - Image.fromarray(arr_normalized).filter(ImageFilter.FIND_EDGES) - ) - EDGE_THRESHOLD = 8 - return np.median(edges_arr[edges_arr > EDGE_THRESHOLD]) / 255 + blurred = im.filter(ImageFilter.GaussianBlur(8)) + diff = ImageChops.difference(im, blurred) + return np.quantile(diff, 0.999) / 255 diff --git a/microqa/items.py b/microqa/items.py index 8cc7f55..de8109f 100644 --- a/microqa/items.py +++ b/microqa/items.py @@ -66,8 +66,8 @@ class ArchiveDoc: identifier archive.org identifier string, for example `"micro_IA40386007_0012"`. - name Document name, with the item identifier, leading whitespace, - and file extension stripped. + name Document name, with the item identifier intact but file + extension stripped. title Optional `title` metadata field assigned to the `_jp2.zip` file, usually indicating that this file represents a subset diff --git a/microqa/ocr/tesseract.py b/microqa/ocr/tesseract.py index 6839c3e..f3b8a45 100644 --- a/microqa/ocr/tesseract.py +++ b/microqa/ocr/tesseract.py @@ -47,23 +47,6 @@ class TesseractOcrEngine(OcrEngine): # TODO: Will this work for non-Latin scripts? Probably not all. df = df[(df["width"] / df["height"]) > 0.8] - print( - [ - TextBlock( - # Rotate X and Y coordinates back to match the original image. - *_box_after_rotation( - int(row["left"]), - int(row["top"]), - int(row["left"] + row["width"]), - int(row["top"] + row["height"]), - *rotated_image.size, - angle, - ), - text=row["text"], - ) - for _, row in df.iterrows() - ] - ) if angle_best is None or df.shape[0] > len(blocks_best): angle_best = angle blocks_best = [ diff --git a/pyproject.toml b/pyproject.toml index 5b38b9e..c8396da 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,5 @@ dependencies = [ [dependency-groups] dev = [ - "jedi>=0.19.2", - "python-lsp-server>=1.13.0", "ruff>=0.12.8", ] diff --git a/uv.lock b/uv.lock index 472ff41..1ee47b6 100644 --- a/uv.lock +++ b/uv.lock @@ -62,43 +62,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/eb/f3/b39e98f6a7eeb8d848cf1cc74766c4783f3d1d6afe50ff7726a2df51f681/bce_python_sdk-0.9.56-py3-none-any.whl", hash = "sha256:432cf3ea4cd4b959dd0185f36ed5c49304a6272a08a17e5f443730b1f437135b", size = 393233, upload-time = "2025-12-16T11:25:22.957Z" }, ] -[[package]] -name = "black" -version = "25.12.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "click" }, - { name = "mypy-extensions" }, - { name = "packaging" }, - { name = "pathspec" }, - { name = "platformdirs" }, - { name = "pytokens" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/c4/d9/07b458a3f1c525ac392b5edc6b191ff140b596f9d77092429417a54e249d/black-25.12.0.tar.gz", hash = "sha256:8d3dd9cea14bff7ddc0eb243c811cdb1a011ebb4800a5f0335a01a68654796a7", size = 659264, upload-time = "2025-12-08T01:40:52.501Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/60/ad/7ac0d0e1e0612788dbc48e62aef8a8e8feffac7eb3d787db4e43b8462fa8/black-25.12.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d0cfa263e85caea2cff57d8f917f9f51adae8e20b610e2b23de35b5b11ce691a", size = 1877003, upload-time = "2025-12-08T01:43:29.967Z" }, - { url = "https://files.pythonhosted.org/packages/e8/dd/a237e9f565f3617a88b49284b59cbca2a4f56ebe68676c1aad0ce36a54a7/black-25.12.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1a2f578ae20c19c50a382286ba78bfbeafdf788579b053d8e4980afb079ab9be", size = 1712639, upload-time = "2025-12-08T01:52:46.756Z" }, - { url = "https://files.pythonhosted.org/packages/12/80/e187079df1ea4c12a0c63282ddd8b81d5107db6d642f7d7b75a6bcd6fc21/black-25.12.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d3e1b65634b0e471d07ff86ec338819e2ef860689859ef4501ab7ac290431f9b", size = 1758143, upload-time = "2025-12-08T01:45:29.137Z" }, - { url = "https://files.pythonhosted.org/packages/93/b5/3096ccee4f29dc2c3aac57274326c4d2d929a77e629f695f544e159bfae4/black-25.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:a3fa71e3b8dd9f7c6ac4d818345237dfb4175ed3bf37cd5a581dbc4c034f1ec5", size = 1420698, upload-time = "2025-12-08T01:45:53.379Z" }, - { url = "https://files.pythonhosted.org/packages/7e/39/f81c0ffbc25ffbe61c7d0385bf277e62ffc3e52f5ee668d7369d9854fadf/black-25.12.0-cp311-cp311-win_arm64.whl", hash = "sha256:51e267458f7e650afed8445dc7edb3187143003d52a1b710c7321aef22aa9655", size = 1229317, upload-time = "2025-12-08T01:46:35.606Z" }, - { url = "https://files.pythonhosted.org/packages/d1/bd/26083f805115db17fda9877b3c7321d08c647df39d0df4c4ca8f8450593e/black-25.12.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:31f96b7c98c1ddaeb07dc0f56c652e25bdedaac76d5b68a059d998b57c55594a", size = 1924178, upload-time = "2025-12-08T01:49:51.048Z" }, - { url = "https://files.pythonhosted.org/packages/89/6b/ea00d6651561e2bdd9231c4177f4f2ae19cc13a0b0574f47602a7519b6ca/black-25.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:05dd459a19e218078a1f98178c13f861fe6a9a5f88fc969ca4d9b49eb1809783", size = 1742643, upload-time = "2025-12-08T01:49:59.09Z" }, - { url = "https://files.pythonhosted.org/packages/6d/f3/360fa4182e36e9875fabcf3a9717db9d27a8d11870f21cff97725c54f35b/black-25.12.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c1f68c5eff61f226934be6b5b80296cf6939e5d2f0c2f7d543ea08b204bfaf59", size = 1800158, upload-time = "2025-12-08T01:44:27.301Z" }, - { url = "https://files.pythonhosted.org/packages/f8/08/2c64830cb6616278067e040acca21d4f79727b23077633953081c9445d61/black-25.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:274f940c147ddab4442d316b27f9e332ca586d39c85ecf59ebdea82cc9ee8892", size = 1426197, upload-time = "2025-12-08T01:45:51.198Z" }, - { url = "https://files.pythonhosted.org/packages/d4/60/a93f55fd9b9816b7432cf6842f0e3000fdd5b7869492a04b9011a133ee37/black-25.12.0-cp312-cp312-win_arm64.whl", hash = "sha256:169506ba91ef21e2e0591563deda7f00030cb466e747c4b09cb0a9dae5db2f43", size = 1237266, upload-time = "2025-12-08T01:45:10.556Z" }, - { url = "https://files.pythonhosted.org/packages/c8/52/c551e36bc95495d2aa1a37d50566267aa47608c81a53f91daa809e03293f/black-25.12.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a05ddeb656534c3e27a05a29196c962877c83fa5503db89e68857d1161ad08a5", size = 1923809, upload-time = "2025-12-08T01:46:55.126Z" }, - { url = "https://files.pythonhosted.org/packages/a0/f7/aac9b014140ee56d247e707af8db0aae2e9efc28d4a8aba92d0abd7ae9d1/black-25.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9ec77439ef3e34896995503865a85732c94396edcc739f302c5673a2315e1e7f", size = 1742384, upload-time = "2025-12-08T01:49:37.022Z" }, - { url = "https://files.pythonhosted.org/packages/74/98/38aaa018b2ab06a863974c12b14a6266badc192b20603a81b738c47e902e/black-25.12.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0e509c858adf63aa61d908061b52e580c40eae0dfa72415fa47ac01b12e29baf", size = 1798761, upload-time = "2025-12-08T01:46:05.386Z" }, - { url = "https://files.pythonhosted.org/packages/16/3a/a8ac542125f61574a3f015b521ca83b47321ed19bb63fe6d7560f348bfe1/black-25.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:252678f07f5bac4ff0d0e9b261fbb029fa530cfa206d0a636a34ab445ef8ca9d", size = 1429180, upload-time = "2025-12-08T01:45:34.903Z" }, - { url = "https://files.pythonhosted.org/packages/e6/2d/bdc466a3db9145e946762d52cd55b1385509d9f9004fec1c97bdc8debbfb/black-25.12.0-cp313-cp313-win_arm64.whl", hash = "sha256:bc5b1c09fe3c931ddd20ee548511c64ebf964ada7e6f0763d443947fd1c603ce", size = 1239350, upload-time = "2025-12-08T01:46:09.458Z" }, - { url = "https://files.pythonhosted.org/packages/35/46/1d8f2542210c502e2ae1060b2e09e47af6a5e5963cb78e22ec1a11170b28/black-25.12.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:0a0953b134f9335c2434864a643c842c44fba562155c738a2a37a4d61f00cad5", size = 1917015, upload-time = "2025-12-08T01:53:27.987Z" }, - { url = "https://files.pythonhosted.org/packages/41/37/68accadf977672beb8e2c64e080f568c74159c1aaa6414b4cd2aef2d7906/black-25.12.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:2355bbb6c3b76062870942d8cc450d4f8ac71f9c93c40122762c8784df49543f", size = 1741830, upload-time = "2025-12-08T01:54:36.861Z" }, - { url = "https://files.pythonhosted.org/packages/ac/76/03608a9d8f0faad47a3af3a3c8c53af3367f6c0dd2d23a84710456c7ac56/black-25.12.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9678bd991cc793e81d19aeeae57966ee02909877cb65838ccffef24c3ebac08f", size = 1791450, upload-time = "2025-12-08T01:44:52.581Z" }, - { url = "https://files.pythonhosted.org/packages/06/99/b2a4bd7dfaea7964974f947e1c76d6886d65fe5d24f687df2d85406b2609/black-25.12.0-cp314-cp314-win_amd64.whl", hash = "sha256:97596189949a8aad13ad12fcbb4ae89330039b96ad6742e6f6b45e75ad5cfd83", size = 1452042, upload-time = "2025-12-08T01:46:13.188Z" }, - { url = "https://files.pythonhosted.org/packages/b2/7c/d9825de75ae5dd7795d007681b752275ea85a1c5d83269b4b9c754c2aaab/black-25.12.0-cp314-cp314-win_arm64.whl", hash = "sha256:778285d9ea197f34704e3791ea9404cd6d07595745907dd2ce3da7a13627b29b", size = 1267446, upload-time = "2025-12-08T01:46:14.497Z" }, - { url = "https://files.pythonhosted.org/packages/68/11/21331aed19145a952ad28fca2756a1433ee9308079bd03bd898e903a2e53/black-25.12.0-py3-none-any.whl", hash = "sha256:48ceb36c16dbc84062740049eef990bb2ce07598272e673c17d1a7720c71c828", size = 206191, upload-time = "2025-12-08T01:40:50.963Z" }, -] - [[package]] name = "certifi" version = "2025.11.12" @@ -223,19 +186,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6d/c1/e419ef3723a074172b68aaa89c9f3de486ed4c2399e2dbd8113a4fdcaf9e/colorlog-6.10.1-py3-none-any.whl", hash = "sha256:2d7e8348291948af66122cff006c9f8da6255d224e7cf8e37d8de2df3bad8c9c", size = 11743, upload-time = "2025-10-16T16:14:10.512Z" }, ] -[[package]] -name = "docstring-to-markdown" -version = "0.17" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "importlib-metadata" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/52/d8/8abe80d62c5dce1075578031bcfde07e735bcf0afe2886dd48b470162ab4/docstring_to_markdown-0.17.tar.gz", hash = "sha256:df72a112294c7492487c9da2451cae0faeee06e86008245c188c5761c9590ca3", size = 32260, upload-time = "2025-05-02T15:09:07.932Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/56/7b/af3d0da15bed3a8665419bb3a630585756920f4ad67abfdfef26240ebcc0/docstring_to_markdown-0.17-py3-none-any.whl", hash = "sha256:fd7d5094aa83943bf5f9e1a13701866b7c452eac19765380dead666e36d3711c", size = 23479, upload-time = "2025-05-02T15:09:06.676Z" }, -] - [[package]] name = "filelock" version = "3.20.1" @@ -368,30 +318,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ff/62/85c4c919272577931d407be5ba5d71c20f0b616d31a0befe0ae45bb79abd/imagesize-1.4.1-py2.py3-none-any.whl", hash = "sha256:0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b", size = 8769, upload-time = "2022-07-01T12:21:02.467Z" }, ] -[[package]] -name = "importlib-metadata" -version = "8.7.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "zipp" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/76/66/650a33bd90f786193e4de4b3ad86ea60b53c89b669a5c7be931fac31cdb0/importlib_metadata-8.7.0.tar.gz", hash = "sha256:d13b81ad223b890aa16c5471f2ac3056cf76c5f10f82d6f9292f0b415f389000", size = 56641, upload-time = "2025-04-27T15:29:01.736Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd", size = 27656, upload-time = "2025-04-27T15:29:00.214Z" }, -] - -[[package]] -name = "jedi" -version = "0.19.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "parso" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/72/3a/79a912fbd4d8dd6fbb02bf69afd3bb72cf0c729bb3063c6f4498603db17a/jedi-0.19.2.tar.gz", hash = "sha256:4770dc3de41bde3966b02eb84fbcf557fb33cce26ad23da12c742fb50ecb11f0", size = 1231287, upload-time = "2024-11-11T01:41:42.873Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c0/5a/9cac0c82afec3d09ccd97c8b6502d48f165f9124db81b4bcb90b4af974ee/jedi-0.19.2-py2.py3-none-any.whl", hash = "sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9", size = 1572278, upload-time = "2024-11-11T01:41:40.175Z" }, -] - [[package]] name = "microqa" version = "0.1.0" @@ -410,8 +336,6 @@ dependencies = [ [package.dev-dependencies] dev = [ - { name = "jedi" }, - { name = "python-lsp-server" }, { name = "ruff" }, ] @@ -429,11 +353,7 @@ requires-dist = [ ] [package.metadata.requires-dev] -dev = [ - { name = "jedi", specifier = ">=0.19.2" }, - { name = "python-lsp-server", specifier = ">=1.13.0" }, - { name = "ruff", specifier = ">=0.12.8" }, -] +dev = [{ name = "ruff", specifier = ">=0.12.8" }] [[package]] name = "modelscope" @@ -451,15 +371,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/86/05/63f01821681b2be5d1739b4aad7b186c28d4ead2c5c99a9fc4aa53c13c19/modelscope-1.33.0-py3-none-any.whl", hash = "sha256:d9bdd566303f813d762e133410007eaf1b78f065c871228ab38640919b707489", size = 6050040, upload-time = "2025-12-10T03:49:58.428Z" }, ] -[[package]] -name = "mypy-extensions" -version = "1.1.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" }, -] - [[package]] name = "networkx" version = "3.6.1" @@ -725,24 +636,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/70/44/5191d2e4026f86a2a109053e194d3ba7a31a2d10a9c2348368c63ed4e85a/pandas-2.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3869faf4bd07b3b66a9f462417d0ca3a9df29a9f6abd5d0d0dbab15dac7abe87", size = 13202175, upload-time = "2025-09-29T23:31:59.173Z" }, ] -[[package]] -name = "parso" -version = "0.8.5" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d4/de/53e0bcf53d13e005bd8c92e7855142494f41171b34c2536b86187474184d/parso-0.8.5.tar.gz", hash = "sha256:034d7354a9a018bdce352f48b2a8a450f05e9d6ee85db84764e9b6bd96dafe5a", size = 401205, upload-time = "2025-08-23T15:15:28.028Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/16/32/f8e3c85d1d5250232a5d3477a2a28cc291968ff175caeadaf3cc19ce0e4a/parso-0.8.5-py2.py3-none-any.whl", hash = "sha256:646204b5ee239c396d040b90f9e272e9a8017c630092bf59980beb62fd033887", size = 106668, upload-time = "2025-08-23T15:15:25.663Z" }, -] - -[[package]] -name = "pathspec" -version = "0.12.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043, upload-time = "2023-12-10T22:30:45Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191, upload-time = "2023-12-10T22:30:43.14Z" }, -] - [[package]] name = "pillow" version = "12.0.0" @@ -830,24 +723,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/95/7e/f896623c3c635a90537ac093c6a618ebe1a90d87206e42309cb5d98a1b9e/pillow-12.0.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:b290fd8aa38422444d4b50d579de197557f182ef1068b75f5aa8558638b8d0a5", size = 6997850, upload-time = "2025-10-15T18:24:11.495Z" }, ] -[[package]] -name = "platformdirs" -version = "4.5.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/cf/86/0248f086a84f01b37aaec0fa567b397df1a119f73c16f6c7a9aac73ea309/platformdirs-4.5.1.tar.gz", hash = "sha256:61d5cdcc6065745cdd94f0f878977f8de9437be93de97c1c12f853c9c0cdcbda", size = 21715, upload-time = "2025-12-05T13:52:58.638Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/cb/28/3bfe2fa5a7b9c46fe7e13c97bda14c895fb10fa2ebf1d0abb90e0cea7ee1/platformdirs-4.5.1-py3-none-any.whl", hash = "sha256:d03afa3963c806a9bed9d5125c8f4cb2fdaf74a55ab60e5d59b3fde758104d31", size = 18731, upload-time = "2025-12-05T13:52:56.823Z" }, -] - -[[package]] -name = "pluggy" -version = "1.6.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, -] - [[package]] name = "prettytable" version = "3.17.0" @@ -1297,44 +1172,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" }, ] -[[package]] -name = "python-lsp-jsonrpc" -version = "1.1.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "ujson" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/48/b6/fd92e2ea4635d88966bb42c20198df1a981340f07843b5e3c6694ba3557b/python-lsp-jsonrpc-1.1.2.tar.gz", hash = "sha256:4688e453eef55cd952bff762c705cedefa12055c0aec17a06f595bcc002cc912", size = 15298, upload-time = "2023-09-23T17:48:30.451Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/cb/d9/656659d5b5d5f402b2b174cd0ba9bc827e07ce3c0bf88da65424baf64af8/python_lsp_jsonrpc-1.1.2-py3-none-any.whl", hash = "sha256:7339c2e9630ae98903fdaea1ace8c47fba0484983794d6aafd0bd8989be2b03c", size = 8805, upload-time = "2023-09-23T17:48:28.804Z" }, -] - -[[package]] -name = "python-lsp-server" -version = "1.14.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "black" }, - { name = "docstring-to-markdown" }, - { name = "jedi" }, - { name = "pluggy" }, - { name = "python-lsp-jsonrpc" }, - { name = "ujson" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/b4/b5/b989d41c63390dfc2bf63275ab543b82fed076723d912055e77ccbae1422/python_lsp_server-1.14.0.tar.gz", hash = "sha256:509c445fc667f41ffd3191cb7512a497bf7dd76c14ceb1ee2f6c13ebe71f9a6b", size = 121536, upload-time = "2025-12-06T16:12:20.86Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/08/cf/587f913335e3855e0ddca2aee7c3f9d5de2d75a1e23434891e9f74783bcd/python_lsp_server-1.14.0-py3-none-any.whl", hash = "sha256:a71a917464effc48f4c70363f90b8520e5e3ba8201428da80b97a7ceb259e32a", size = 77060, upload-time = "2025-12-06T16:12:19.46Z" }, -] - -[[package]] -name = "pytokens" -version = "0.3.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/4e/8d/a762be14dae1c3bf280202ba3172020b2b0b4c537f94427435f19c413b72/pytokens-0.3.0.tar.gz", hash = "sha256:2f932b14ed08de5fcf0b391ace2642f858f1394c0857202959000b68ed7a458a", size = 17644, upload-time = "2025-11-05T13:36:35.34Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/84/25/d9db8be44e205a124f6c98bc0324b2bb149b7431c53877fc6d1038dddaf5/pytokens-0.3.0-py3-none-any.whl", hash = "sha256:95b2b5eaf832e469d141a378872480ede3f251a5a5041b8ec6e581d3ac71bbf3", size = 12195, upload-time = "2025-11-05T13:36:33.183Z" }, -] - [[package]] name = "pytz" version = "2025.2" @@ -1729,12 +1566,3 @@ sdist = { url = "https://files.pythonhosted.org/packages/24/30/6b0809f4510673dc7 wheels = [ { url = "https://files.pythonhosted.org/packages/af/b5/123f13c975e9f27ab9c0770f514345bd406d0e8d3b7a0723af9d43f710af/wcwidth-0.2.14-py2.py3-none-any.whl", hash = "sha256:a7bb560c8aee30f9957e5f9895805edd20602f2d7f720186dfd906e82b4982e1", size = 37286, upload-time = "2025-09-22T16:29:51.641Z" }, ] - -[[package]] -name = "zipp" -version = "3.23.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e3/02/0f2892c661036d50ede074e376733dca2ae7c6eb617489437771209d4180/zipp-3.23.0.tar.gz", hash = "sha256:a07157588a12518c9d4034df3fbbee09c814741a33ff63c05fa29d26a2404166", size = 25547, upload-time = "2025-06-08T17:06:39.4Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/2e/54/647ade08bf0db230bfea292f893923872fd20be6ac6f53b2b936ba839d75/zipp-3.23.0-py3-none-any.whl", hash = "sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e", size = 10276, upload-time = "2025-06-08T17:06:38.034Z" }, -]