SoftwareDevLabs
diff --git a/‎.env.example‎
Lines changed: 468 additions & 0 deletions b/‎.env.example‎
Lines changed: 468 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 4 additions & 0 deletions b/‎.gitignore‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎requirements-ai-processing.txt‎
Lines changed: 34 additions & 0 deletions b/‎requirements-ai-processing.txt‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎requirements-dev.txt‎
Lines changed: 6 additions & 0 deletions b/‎requirements-dev.txt‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎requirements-document-processing.txt‎
Lines changed: 33 additions & 0 deletions b/‎requirements-document-processing.txt‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎requirements-streamlit.txt‎
Lines changed: 10 additions & 0 deletions b/‎requirements-streamlit.txt‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎requirements.txt‎
Lines changed: 26 additions & 0 deletions b/‎requirements.txt‎
Lines changed: 26 additions & 0 deletions
@@ -338,3 +338,7 @@ doc/codeDocs/parsers.rst
 doc/codeDocs/parsers.database.rst
 doc/codeDocs/utils.rst
 documentation-output/
+
+# External dependencies (now managed as pip packages, reference in oss/)
+requirements_agent/docling/
+.env
@@ -0,0 +1,34 @@
+# Phase 2: AI/ML Processing Requirements
+# Advanced machine learning capabilities for document processing
+
+# Core ML/AI dependencies
+torch>=2.0.0
+transformers>=4.30.0
+sentence-transformers>=2.2.0
+datasets>=2.14.0
+
+# Computer Vision
+torchvision>=0.15.0
+Pillow>=9.5.0
+opencv-python>=4.8.0
+
+# NLP and Language Processing
+spacy>=3.6.0
+nltk>=3.8.0
+textblob>=0.17.1
+
+# Vector Operations and Embeddings
+numpy>=1.24.0
+faiss-cpu>=1.7.4  # For similarity search
+scikit-learn>=1.3.0
+
+# Advanced Document Understanding
+layoutparser>=0.3.4
+detectron2>=0.6  # For layout analysis
+
+# Optional GPU support (user can upgrade)
+# torch[cuda] - user should install manually if needed
+
+# Development and Testing (already in requirements-dev.txt)
+# pytest>=7.4.0
+# pytest-cov>=4.1.0
@@ -8,3 +8,9 @@ python-dotenv==1.0.1
 PyYAML==6.0.1
 types-PyYAML==6.0.12.20240917
 pre-commit==3.7.1
+google-generativeai>=0.3.0  # For Gemini LLM support
+
+# ML and monitoring dependencies
+scikit-learn>=1.3.0  # For ML-based tagging
+numpy>=1.24.0  # For numerical operations
+pandas>=2.0.0  # For data analysis (optional)
@@ -0,0 +1,33 @@
+# Document Processing Dependencies - Phase 1 (Core)
+# Essential dependencies for basic document processing functionality
+
+# Core document processing
+docling>=1.0.0
+docling-core>=1.0.0
+
+# PDF processing
+PyPDF2>=3.0.0
+pdfplumber>=0.9.0
+
+# Office document support
+python-docx>=0.8.11
+python-pptx>=0.6.21
+
+# HTML/XML processing
+beautifulsoup4>=4.12.0
+lxml>=4.9.0
+
+# Image processing (for OCR)
+Pillow>=9.0.0
+
+# Text processing utilities
+markdown>=3.4.0
+chardet>=5.0.0
+
+# Optional: Advanced ML features (Phase 2)
+# Uncomment these for AI-enhanced document processing:
+# torch>=2.0.0
+# transformers>=4.30.0
+# sentence-transformers>=2.2.0
+# easyocr>=1.7.0
+# layoutparser>=0.3.4
@@ -0,0 +1,10 @@
+# Streamlit UI Dependencies
+# Install with: pip install -r requirements-streamlit.txt
+
+streamlit>=1.28.0
+markdown>=3.5.0
+pandas>=2.0.0
+pyyaml>=6.0.0
+
+# Optional for enhanced features
+plotly>=5.17.0  # For interactive charts
@@ -7,3 +7,29 @@ fastapi==0.117.1
 uvicorn==0.37.0
 psycopg2-binary==2.9.10
 PyYAML==6.0.2
+
+# Basic text processing
+requests>=2.28.0
+
+# Optional document processing (install with: pip install -r requirements-document-processing.txt)
+# docling>=1.0.0
+# PyPDF2>=3.0.0
+
+# Phase 3: Advanced LLM Integration (Optional)
+# Install these for full Phase 3 capabilities:
+
+# LLM Client Libraries (uncomment to enable)
+# openai>=1.0.0
+# anthropic>=0.3.0
+
+# Advanced NLP and ML (uncomment to enable)
+# sentence-transformers>=2.2.0
+# scikit-learn>=1.3.0
+# numpy>=1.24.0
+
+# Graph and Network Analysis (uncomment to enable)
+# networkx>=3.0
+
+# Note: Phase 3 components include graceful degradation
+# and will work with limited functionality if these
+# optional dependencies are not installed.