From 7dfaed52fff55830616b60170d0d42b791414b81 Mon Sep 17 00:00:00 2001 From: Daniel van Strien Date: Mon, 23 Mar 2026 13:42:41 +0000 Subject: [PATCH] Add Qianfan-OCR and dots.mocr to model registry Register two new OCR models available in uv-scripts/ocr: - Qianfan-OCR (4.7B): #1 on OmniDocBench v1.5, 192 languages - dots.mocr (3B): upgraded dots.ocr with layout/SVG/8 prompt modes Both are opt-in via --models flag; DEFAULT_MODELS unchanged. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/ocr_bench/run.py | 12 ++++++++++++ tests/test_run.py | 4 ++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/ocr_bench/run.py b/src/ocr_bench/run.py index 937156e..a009a9b 100644 --- a/src/ocr_bench/run.py +++ b/src/ocr_bench/run.py @@ -54,6 +54,18 @@ class ModelConfig: size="2.1B", default_flavor="l4x1", ), + "qianfan-ocr": ModelConfig( + script="https://huggingface.co/datasets/uv-scripts/ocr/raw/main/qianfan-ocr.py", + model_id="baidu/Qianfan-OCR", + size="4.7B", + default_flavor="l4x1", + ), + "dots-mocr": ModelConfig( + script="https://huggingface.co/datasets/uv-scripts/ocr/raw/main/dots-mocr.py", + model_id="rednote-hilab/dots.mocr", + size="3B", + default_flavor="l4x1", + ), } DEFAULT_MODELS = ["glm-ocr", "deepseek-ocr", "lighton-ocr-2", "dots-ocr", "firered-ocr"] diff --git a/tests/test_run.py b/tests/test_run.py index 6d31159..1a4fae5 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -37,8 +37,8 @@ def test_custom_args(self): class TestModelRegistry: - def test_has_five_core_models(self): - assert len(MODEL_REGISTRY) == 5 + def test_has_core_models(self): + assert len(MODEL_REGISTRY) == 7 def test_default_models_exist_in_registry(self): for slug in DEFAULT_MODELS: