tqtensor · tqtensor · May 6, 2025 · May 6, 2025 · May 6, 2025
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
@@ -1 +1 @@
-* @iamarunbrahma
+* @iamarunbrahma
diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -43,4 +43,4 @@ body:
     id: expected
     attributes:
       label: Expected Behavior
-      description: What did you expect to happen?
+      description: What did you expect to happen?
diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml
@@ -36,4 +36,4 @@ body:
         - label: This feature would be useful to other users
           required: true
         - label: I'm willing to help implement this feature
-          required: false
+          required: false
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -11,4 +11,4 @@ In short, provide a summary of what this PR does and why. Usually, the relevant
 - [ ] Ran make lint and make format to handle lint / formatting issues.
 - [ ] Ran make test to run relevant tests scripts.
 - [ ] Read the [contributor guidelines](https://github.com/iamarunbrahma/vision-parse/blob/main/CONTRIBUTING.md).
-- [ ] Wrote necessary unit or integration tests.
+- [ ] Wrote necessary unit or integration tests.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -36,4 +36,4 @@ jobs:
       run: |
         source .venv/bin/activate
         ruff check .
-        black . --check
+        black . --check
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
@@ -34,4 +34,4 @@ jobs:
     - name: Perform CodeQL Analysis
       uses: github/codeql-action/analyze@v3
       with:
-        category: "/language:${{matrix.language}}"
+        category: "/language:${{matrix.language}}"
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -48,4 +48,4 @@ jobs:
           TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
         run: |
           source .venv/bin/activate
-          twine upload dist/* 
+          twine upload dist/*
diff --git a/.gitignore b/.gitignore
@@ -1,17 +1,17 @@
+# Environment variables
+.env
+.env.*
+.DS_Store
+
 # Python-generated files
-__pycache__/
+*.egg-info
 *.py[oc]
+__pycache__/
+.pytest_cache/
+.ruff_cache/
 build/
 dist/
 wheels/
-*.egg-info
-.ruff_cache/
-.pytest_cache/
-.DS_Store
-
-# Environment variables
-.env
-.env.*
 
 # Virtual environments
-.venv
+.venv
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,33 @@
+repos:
+  - repo: https://github.com/ambv/black
+    rev: 23.3.0
+    hooks:
+      - id: black
+        language_version: python3
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.4.0
+    hooks:
+      - id: check-json
+        exclude: '(^.vscode/)'
+      - id: pretty-format-json
+        exclude: '(^.vscode/)'
+        args:
+          - --autofix
+          - --indent
+          - '2'
+      - id: end-of-file-fixer
+      - id: trailing-whitespace
+  - repo: https://github.com/pycqa/isort
+    rev: 5.11.5
+    hooks:
+      - id: isort
+        args: ['--profile', 'black']
+  - repo: local
+    hooks:
+      - id: flake8
+        name: flake8
+        entry: uv run flake8 src
+        language: system
+        pass_filenames: false
+        always_run: true
+        args: ['--ignore=E203,E501,W503']
diff --git a/CITATION.cff b/CITATION.cff
@@ -5,4 +5,4 @@ authors:
   given-names: "Arun"
 title: "Vision-Parse: Parse PDFs into markdown using Vision LLMs"
 date-released: 2024-12-31
-url: "https://github.com/iamarunbrahma/vision-parse"
+url: "https://github.com/iamarunbrahma/vision-parse"
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -75,4 +75,4 @@ By participating in this project, you agree to abide by our Code of Conduct:
 2. **Create Pull Request**
    - Go to your fork on GitHub and click "New Pull Request"
    - Select your feature branch
-   - Fill in the PR template by describing your changes, and referencing related issue.
+   - Fill in the PR template by describing your changes, and referencing related issue.
diff --git a/Dockerfile b/Dockerfile
@@ -40,4 +40,4 @@ esac\n\
 exec "$@"' > /start.sh && chmod +x /start.sh
 
 ENTRYPOINT ["/start.sh"]
-CMD ["tail", "-f", "/dev/null"] 
+CMD ["tail", "-f", "/dev/null"]
diff --git a/Makefile b/Makefile
@@ -23,4 +23,4 @@ tag:
 
 release: build tag
 	@echo "Release workflow will be triggered by the tag push"
-	@echo "Distribution files are available in ./dist directory"
+	@echo "Distribution files are available in ./dist directory"
diff --git a/README.md b/README.md
@@ -51,7 +51,7 @@ pip install 'vision-parse[all]'
 **Install the package from source:**
 
 ```bash
-pip install 'git+https://github.com/iamarunbrahma/vision-parse.git#egg=vision-parse[all]'
+pip install 'git+https://github.com/tqtensor/vision-parse.git#egg=vision-parse[all]'
 ```
 
 ### Setting up Ollama (Optional)
@@ -180,34 +180,34 @@ parser = VisionParser(
 
 This package supports the following Vision LLM models:
 
-| **Model Name** | **Provider Name** |
-|:------------:|:----------:|
-| gpt-4o | OpenAI |
-| gpt-4o-mini | OpenAI |
-| gemini-1.5-flash | Google |
-| gemini-2.0-flash-exp | Google |
-| gemini-1.5-pro | Google |
-| llava:13b | Ollama |
-| llava:34b | Ollama |
-| llama3.2-vision:11b | Ollama |
-| llama3.2-vision:70b | Ollama |
-| deepseek-r1:32b | Ollama |
-| deepseek-chat | DeepSeek |
+|    **Model Name**    | **Provider Name** |
+| :------------------: | :---------------: |
+|        gpt-4o        |      OpenAI       |
+|     gpt-4o-mini      |      OpenAI       |
+|   gemini-1.5-flash   |      Google       |
+| gemini-2.0-flash-exp |      Google       |
+|    gemini-1.5-pro    |      Google       |
+|      llava:13b       |      Ollama       |
+|      llava:34b       |      Ollama       |
+| llama3.2-vision:11b  |      Ollama       |
+| llama3.2-vision:70b  |      Ollama       |
+|   deepseek-r1:32b    |      Ollama       |
+|    deepseek-chat     |     DeepSeek      |
 
 ## 🔧 Customization Parameters
 
 Vision Parse offers several customization parameters to enhance document processing:
 
-| **Parameter** | **Description** | **Value Type** |
-|:---------:|:-----------:|:-------------:|
-| model_name | Name of the Vision LLM model to use | str |
-| custom_prompt | Define custom prompt for the model and it will be used as a suffix to the default prompt | str |
-| ollama_config | Specify custom configuration for Ollama client initialization | dict |
-| openai_config | Specify custom configuration for OpenAI, Azure OpenAI or DeepSeek client initialization | dict |
-| gemini_config | Specify custom configuration for Gemini client initialization | dict |
-| image_mode | Sets the image output format for the model i.e. if you want image url in markdown content or base64 encoded image | str |
-| detailed_extraction | Enable advanced content extraction to extract complex information such as LaTeX equations, tables, images, etc. | bool |
-| enable_concurrency | Enable parallel processing of multiple pages in a PDF document in a single request | bool |
+|    **Parameter**    |                                                  **Description**                                                  | **Value Type** |
+| :-----------------: | :---------------------------------------------------------------------------------------------------------------: | :------------: |
+|     model_name      |                                        Name of the Vision LLM model to use                                        |      str       |
+|    custom_prompt    |             Define custom prompt for the model and it will be used as a suffix to the default prompt              |      str       |
+|    ollama_config    |                           Specify custom configuration for Ollama client initialization                           |      dict      |
+|    openai_config    |              Specify custom configuration for OpenAI, Azure OpenAI or DeepSeek client initialization              |      dict      |
+|    gemini_config    |                           Specify custom configuration for Gemini client initialization                           |      dict      |
+|     image_mode      | Sets the image output format for the model i.e. if you want image url in markdown content or base64 encoded image |      str       |
+| detailed_extraction |  Enable advanced content extraction to extract complex information such as LaTeX equations, tables, images, etc.  |      bool      |
+| enable_concurrency  |                Enable parallel processing of multiple pages in a PDF document in a single request                 |      bool      |
 
 > [!TIP]
 > For more details on custom model configuration i.e. `openai_config`, `gemini_config`, and `ollama_config`; please refer to [Model Configuration](docs/model_config.md).
@@ -220,11 +220,11 @@ Since there are no other ground truth data available for this task, I relied on
 
 ### Results
 
-| Parser | Accuracy Score |
-|:-------:|:---------------:|
-| Vision Parse | 92% |
-| MarkItDown | 67% |
-| Nougat | 79% |
+|    Parser    | Accuracy Score |
+| :----------: | :------------: |
+| Vision Parse |      92%       |
+|  MarkItDown  |      67%       |
+|    Nougat    |      79%       |
 
 > [!NOTE]
 > I used gpt-4o model for Vision Parse to extract markdown content from the pdf documents. I have used model parameter settings as in `scoring.py` script. The above results may vary depending on the model you choose for Vision Parse and the model parameter settings.

diff --git a/benchmarks/benchmark_results.md b/benchmarks/benchmark_results.md
@@ -20,4 +20,3 @@ Generated on: 2025-01-03 16:15:01
 
 - Number of runs: 3
 - Individual accuracy scores: 0.52, 0.52, 0.52
-
diff --git a/benchmarks/ground_truth.md b/benchmarks/ground_truth.md
@@ -7,10 +7,10 @@ Quantum Computing is a revolutionary paradigm that leverages the principles of q
 ## Key Concepts in Quantum Computing
 ---
 
-### 1. *Quantum Mechanics Basics*  
-Quantum computing is built on the principles of:  
-- **Superposition**: A qubit can represent both 0 and 1 simultaneously.  
-- **Entanglement**: Qubits can become interconnected, influencing each other regardless of distance.  
+### 1. *Quantum Mechanics Basics*
+Quantum computing is built on the principles of:
+- **Superposition**: A qubit can represent both 0 and 1 simultaneously.
+- **Entanglement**: Qubits can become interconnected, influencing each other regardless of distance.
 - **Quantum Interference**: Amplifies correct solutions and suppresses incorrect ones.
 
 ### 2. *Classical vs Quantum Computing*
@@ -26,42 +26,42 @@ Quantum computing is built on the principles of:
 ## Applications of Quantum Computing
 ---
 
-1. **Cryptography**  
-   - Breaking traditional encryption systems (e.g., RSA).  
+1. **Cryptography**
+   - Breaking traditional encryption systems (e.g., RSA).
    - Building unbreakable quantum encryption.
 
-2. **Healthcare**  
-   - Drug discovery through quantum simulations.  
+2. **Healthcare**
+   - Drug discovery through quantum simulations.
    - Optimizing protein folding patterns.
 
-3. **Artificial Intelligence (AI)**  
-   - Enhancing machine learning algorithms.  
+3. **Artificial Intelligence (AI)**
+   - Enhancing machine learning algorithms.
    - Faster data processing and pattern recognition.
 
 
 ### Advantages and Challenges
 
-**Advantages**  
-1. Exponential computational power.  
-2. Solves problems intractable for classical computers.  
+**Advantages**
+1. Exponential computational power.
+2. Solves problems intractable for classical computers.
 
-**Challenges**  
-1. **Hardware stability**: Qubits are fragile and error-prone.  
-2. **Scalability**: Building large quantum systems is expensive.  
+**Challenges**
+1. **Hardware stability**: Qubits are fragile and error-prone.
+2. **Scalability**: Building large quantum systems is expensive.
 3. **Software development**: Limited programming frameworks.
 
 
 ### Key Players in Quantum Computing
 ---
 
-Some organizations leading the quantum revolution include:  
-- [IBM Quantum](https://www.ibm.com/quantum-computing)  
-- [Google Quantum AI](https://quantumai.google)  
-- [Microsoft Azure Quantum](https://azure.microsoft.com/en-us/services/quantum/)  
+Some organizations leading the quantum revolution include:
+- [IBM Quantum](https://www.ibm.com/quantum-computing)
+- [Google Quantum AI](https://quantumai.google)
+- [Microsoft Azure Quantum](https://azure.microsoft.com/en-us/services/quantum/)
 - [D-Wave](https://www.dwavesys.com)
 
 
 ## Conclusion
 ---
 
-Quantum computing is still in its infancy but holds immense promise to redefine technology. As research progresses, it has the potential to solve problems deemed insurmountable by classical computing systems.
+Quantum computing is still in its infancy but holds immense promise to redefine technology. As research progresses, it has the potential to solve problems deemed insurmountable by classical computing systems.
diff --git a/benchmarks/scoring.py b/benchmarks/scoring.py
@@ -1,12 +1,14 @@
 import os
+import statistics
+from datetime import datetime
 from pathlib import Path
+
 import nltk
-from nltk.translate.bleu_score import sentence_bleu
 from Levenshtein import distance
 from markitdown import MarkItDown
+from nltk.translate.bleu_score import sentence_bleu
+
 from vision_parse import VisionParser
-import statistics
-from datetime import datetime
 
 # Download required NLTK data
 nltk.download("punkt")

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -9,24 +9,24 @@ services:
         - MODEL_NAME=${MODEL_NAME:?MODEL_NAME is required}
     environment:
       - MODEL_NAME=${MODEL_NAME:?MODEL_NAME is required}
-      - OPENAI_API_KEY=${OPENAI_API_KEY:-}  # Optional: For OpenAI models
-      - GEMINI_API_KEY=${GEMINI_API_KEY:-}  # Optional: For Gemini models
+      - OPENAI_API_KEY=${OPENAI_API_KEY:-} # Optional: For OpenAI models
+      - GEMINI_API_KEY=${GEMINI_API_KEY:-} # Optional: For Gemini models
     volumes:
       - .:/app
     working_dir: /app
     tty: true
     stdin_open: true
     ports:
-      - "11434:11434"  # Expose Ollama port
+      - '11434:11434' # Expose Ollama port
     deploy:
       resources:
         limits:
-          memory: 16G  # Set memory limit to 16GB
+          memory: 16G # Set memory limit to 16GB
         reservations:
-          memory: 8G   # Guarantee at least 8GB
+          memory: 8G # Guarantee at least 8GB
           # Uncomment below lines if you have NVIDIA GPU available
           # devices:
           #   - driver: nvidia
           #     count: all
           #     capabilities: [gpu]
-    command: tail -f /dev/null  # Keep container running 
+    command: tail -f /dev/null # Keep container running
Original file line number	Diff line number	Diff line change
Expand Up		@@ -20,4 +20,3 @@ Generated on: 2025-01-03 16:15:01

		- Number of runs: 3
		- Individual accuracy scores: 0.52, 0.52, 0.52