diff --git a/.github/workflows/deepeval-tests.yml b/.github/workflows/deepeval-tests.yml index 7c33ad8..f26d741 100644 --- a/.github/workflows/deepeval-tests.yml +++ b/.github/workflows/deepeval-tests.yml @@ -3,7 +3,7 @@ name: DeepEval RAG System Tests on: pull_request: types: [opened, synchronize, reopened] - branches: ["rag-33-debug", "RAG-33-31okt", "wip_3_12"] + branches: ["RAG-217-debug"] paths: - 'src/**' - 'tests/**' @@ -94,7 +94,7 @@ jobs: 2. Add the missing secrets with the appropriate values: **Azure OpenAI Configuration:** - - \`AZURE_OPENAI_ENDPOINT\` - Your Azure OpenAI resource endpoint (e.g., \`https://your-resource.openai.azure.com/\`) + - \`AZURE_OPENAI_ENDPOINT\` - Your Azure OpenAI endpoint (e.g., \`https://your-resource.openai.azure.com/\`) - \`AZURE_OPENAI_API_KEY\` - Your Azure OpenAI API key - \`AZURE_OPENAI_DEPLOYMENT\` - Chat model deployment name (e.g., \`gpt-4o-mini\`) - \`AZURE_OPENAI_EMBEDDING_DEPLOYMENT\` - Embedding model deployment name (e.g., \`text-embedding-3-large\`) @@ -193,8 +193,8 @@ jobs: AZURE_STORAGE_BLOB_NAME: ${{ secrets.AZURE_STORAGE_BLOB_NAME }} EVAL_MODE: "true" run: | - # Run tests with testcontainers managing Docker Compose - uv run python -m pytest tests/deepeval_tests/standard_tests.py -v --tb=short --log-cli-level=INFO + # Run tests sequentially (one at a time) to avoid rate limiting + uv run python -m pytest tests/deepeval_tests/standard_tests.py -v --tb=short --log-cli-level=INFO -n 0 - name: Fix permissions on test artifacts if: always() diff --git a/docker-compose-eval.yml b/docker-compose-eval.yml index 2c6aadf..ba4cb12 100644 --- a/docker-compose-eval.yml +++ b/docker-compose-eval.yml @@ -56,7 +56,7 @@ services: - vault volumes: - ./test-vault/agents/llm:/agent/in - - ./test-vault/agent-out:/agent/out + - ./test-vault/agent-out:/agent/llm-token entrypoint: ["sh", "-c"] command: - | @@ -254,12 +254,13 @@ services: - "8100:8100" environment: - VAULT_ADDR=http://vault:8200 - - VAULT_TOKEN_FILE=/agent/out/token + - VAULT_TOKEN_FILE=/agent/llm-token/token + - ENVIRONMENT=development - QDRANT_URL=http://qdrant:6333 - EVAL_MODE=true volumes: - ./src/llm_config_module/config:/app/src/llm_config_module/config:ro - - ./test-vault/agent-out:/agent/out:ro + - ./test-vault/agent-out:/agent/llm-token:ro - test_llm_orchestration_logs:/app/logs depends_on: - qdrant diff --git a/src/llm_orchestration_service.py b/src/llm_orchestration_service.py index abef9a1..931bdd0 100644 --- a/src/llm_orchestration_service.py +++ b/src/llm_orchestration_service.py @@ -2192,7 +2192,7 @@ def _generate_rag_response( }, output=answer, ) - + retrieval_context: List[Dict[str, Any]] | None = None if eval_mode and relevant_chunks: max_blocks_used = ResponseGenerationConstants.DEFAULT_MAX_BLOCKS @@ -2230,16 +2230,16 @@ def _generate_rag_response( chunks=None, # No chunks when question is out of scope ) else: - response = OrchestrationResponse( + response = OrchestrationResponse( chatId=request.chatId, llmServiceActive=True, # service OK; insufficient context questionOutOfLLMScope=True, inputGuardFailed=False, content=localized_msg, ) - if eval_mode: + if eval_mode: response.retrieval_context = retrieval_context - return response + return response # In-scope: return the answer as-is (NO citations) logger.info("Returning in-scope answer without citations.") @@ -2264,14 +2264,14 @@ def _generate_rag_response( chunks=self._format_chunks_for_test_response(relevant_chunks), ) else: - response = OrchestrationResponse( + response = OrchestrationResponse( chatId=request.chatId, llmServiceActive=True, questionOutOfLLMScope=False, inputGuardFailed=False, content=content_with_refs, ) - if eval_mode: + if eval_mode: response.retrieval_context = retrieval_context return response diff --git a/src/llm_orchestration_service_api.py b/src/llm_orchestration_service_api.py index 2f7edee..c171763 100644 --- a/src/llm_orchestration_service_api.py +++ b/src/llm_orchestration_service_api.py @@ -1,4 +1,5 @@ """LLM Orchestration Service API - FastAPI application.""" + import os from contextlib import asynccontextmanager from typing import Any, AsyncGenerator, Dict @@ -40,7 +41,7 @@ ContextGenerationRequest, ContextGenerationResponse, EmbeddingErrorResponse, - DeepEvalTestOrchestrationResponse + DeepEvalTestOrchestrationResponse, ) diff --git a/src/models/request_models.py b/src/models/request_models.py index 539d708..82a64b9 100644 --- a/src/models/request_models.py +++ b/src/models/request_models.py @@ -278,4 +278,4 @@ class DeepEvalTestOrchestrationResponse(BaseModel): content: str retrieval_context: Optional[List[Dict[str, Any]]] = None refined_questions: Optional[List[str]] = None - expected_output: Optional[str] = None # For DeepEval \ No newline at end of file + expected_output: Optional[str] = None # For DeepEval diff --git a/test-vault/agents/llm/agent.hcl b/test-vault/agents/llm/agent.hcl index 9883bfe..69ffde4 100644 --- a/test-vault/agents/llm/agent.hcl +++ b/test-vault/agents/llm/agent.hcl @@ -3,7 +3,7 @@ vault { address = "http://vault:8200" } -pid_file = "/agent/out/pidfile" +pid_file = "/agent/llm-token/pidfile" auto_auth { method "approle" { @@ -17,7 +17,7 @@ auto_auth { sink "file" { config = { - path = "/agent/out/token" + path = "/agent/llm-token/token" } } } @@ -36,7 +36,7 @@ listener "tcp" { # dummy template so cache is “active” (some versions require this) template { source = "/dev/null" - destination = "/agent/out/dummy" + destination = "/agent/llm-token/dummy" } # Disable API proxy; not needed here diff --git a/tests/conftest.py b/tests/conftest.py index b67d114..90d9c8f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -16,14 +16,11 @@ def download_embeddings_from_azure( - connection_string: str, - container_name: str, - blob_name: str, - local_path: Path + connection_string: str, container_name: str, blob_name: str, local_path: Path ) -> None: """ Download pre-computed embeddings from Azure Blob Storage. - + Args: connection_string: Azure Storage connection string container_name: Name of the blob container @@ -34,28 +31,29 @@ def download_embeddings_from_azure( logger.info(f" Container: {container_name}") logger.info(f" Blob: {blob_name}") logger.info(f" Local path: {local_path}") - + try: # Create BlobServiceClient - blob_service_client = BlobServiceClient.from_connection_string(connection_string) - + blob_service_client = BlobServiceClient.from_connection_string( + connection_string + ) + # Get blob client blob_client = blob_service_client.get_blob_client( - container=container_name, - blob=blob_name + container=container_name, blob=blob_name ) - + # Ensure parent directory exists local_path.parent.mkdir(parents=True, exist_ok=True) - + # Download the blob with open(local_path, "wb") as download_file: download_stream = blob_client.download_blob() download_file.write(download_stream.readall()) - + file_size_kb = local_path.stat().st_size / 1024 logger.info(f"✓ Downloaded embeddings successfully ({file_size_kb:.2f} KB)") - + except Exception as e: logger.error(f"Failed to download embeddings from Azure: {e}") raise @@ -144,10 +142,10 @@ def start(self) -> None: """Start all test containers and bootstrap Vault""" logger.info("Starting RAG Stack testcontainers...") os.environ["EVAL_MODE"] = "true" - + # Download embeddings from Azure before starting containers self._download_embeddings_from_azure() - + # Prepare Vault Agent directories agent_in = self.project_root / "test-vault" / "agents" / "llm" agent_out = self.project_root / "test-vault" / "agent-out" @@ -212,15 +210,15 @@ def _download_embeddings_from_azure(self) -> None: connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING") container_name = os.getenv("AZURE_STORAGE_CONTAINER_NAME", "test-embeddings") blob_name = os.getenv("AZURE_STORAGE_BLOB_NAME", "test_embeddings.json") - + # Local path where embeddings should be saved embeddings_file = self.project_root / "tests" / "data" / "test_embeddings.json" - + # Skip if embeddings already exist locally (for local development) - #if embeddings_file.exists(): + # if embeddings_file.exists(): # logger.info("Embeddings file already exists locally, skipping Azure download") # return - + # Require Azure configuration for CI/CD if not connection_string: raise ValueError( @@ -228,17 +226,17 @@ def _download_embeddings_from_azure(self) -> None: "Either set this environment variable or ensure test_embeddings.json " f"exists at {embeddings_file}" ) - + logger.info("=" * 80) logger.info("DOWNLOADING EMBEDDINGS FROM AZURE BLOB STORAGE") logger.info("=" * 80) - + try: download_embeddings_from_azure( connection_string=connection_string, container_name=container_name, blob_name=blob_name, - local_path=embeddings_file + local_path=embeddings_file, ) logger.info("Embeddings download complete") except Exception as e: @@ -594,7 +592,7 @@ def _fix_token_file_permissions( "vault-agent-llm", "chmod", "644", - "/agent/out/token", + "/agent/llm-token/token", ], capture_output=True, text=True, @@ -797,8 +795,9 @@ def orchestration_client(rag_stack: RAGStackTestContainers): Function-scoped fixture that provides the orchestration service URL. Tests can use either requests (sync) or httpx (async). """ + class OrchestrationClient: def __init__(self, base_url: str): self.base_url = base_url - - return OrchestrationClient(rag_stack.get_orchestration_service_url()) \ No newline at end of file + + return OrchestrationClient(rag_stack.get_orchestration_service_url()) diff --git a/tests/data/test_dataset.json b/tests/data/test_dataset.json index 0b09b6e..e94b012 100644 --- a/tests/data/test_dataset.json +++ b/tests/data/test_dataset.json @@ -11,24 +11,6 @@ "category": "digital_identity_security", "language": "et" }, - { - "input": "Mida teha, kui mu ID-kaart on kadunud", - "expected_output": "Kui sinu ID-kaart on kadunud, tuleks tegutseda järgmiselt: Mõtle läbi, kas kaart võib olla turvalises kohas – näiteks kontorilaual või mujal, kus seda hiljuti kasutasid. Peata ID-kaardiga seotud sertifikaadid, helistades numbril +372 677 3377. Sertifikaatide peatamine tähendab, et kaarti ei saa kasutada elektroonilistes kanalites, kuid see kehtib endiselt füüsilise isikut tõendava dokumendina (nt reisidokumendina). Kui oled kindel, et kaart on varastatud, teavita politseid ja lase ID-kaart kehtetuks tunnistada. Kui kaardi kadumine toimub välismaal, saad Eesti välisesindusest taotleda ajutise tagasipöördumise tunnistuse. Kui vajad täiendavat abi, võid pöörduda Politsei- ja Piirivalveameti teenindusse.", - "category": "digital_identity_security", - "language": "et" - }, - { - "input": "Millised mobiilioperaatorid pakuvad Mobiil-ID-d Eestis?", - "expected_output": "Mobiil-ID SIM-kaarte Eestis väljustavad kolm mobiilioperaatorit: Telia, Elisa ja Tele2. Mobiil-ID SIM-kaarte saab taotleda nende operaatorite esindustes. Igal operaatoril on Mobiil-ID väljastamiseks oma reeglid, näiteks vanusepiirangud, teenusetasud ja kasutustingimused, kuid riiklikud nõuded on kõigile ühesugused. Kõik operaatorid peavad järgima samu riiklikke nõudeid, tagades Mobiil-ID ohutuse ja ühesuguse toimimise sõltumata kasutatavast operaatorist.", - "category": "digital_identity", - "language": "et" - }, - { - "input": "Kuidas taotleda uut ID-kaardi koodiümbrikku?", - "expected_output": "Uue koodiümbriku taotlemiseks tuleb pöörduda Politsei- ja Piirivalveameti teenindusse või Eesti Vabariigi välisesindusse. Kaasa tuleb võtta isikut tõendav dokument. Võimalusel valida mobiilne pilet enne teeninduse külastamist. Maksa riigilõiv, mis rakendub koodiümbriku väljastamisel. Riigilõivu määrad leiad Politsei- ja Piirivalveameti kodulehelt. ui sinu ID-kaart on väljastatud pärast 17.11.2025, saad PUK-koodi vaadata ka Politsei- ja Piirivalveameti iseteenindusest, valides teenuse “ID-kaardi PUK-i vaatamine“", - "category": "id_card_usage", - "language": "et" - }, { "input": "Mis on eIDAS määrus?", "expected_output": "eIDAS määrus (electronic IDentification, Authentication and trust Services) on Euroopa Liidus kehtiv e-identimise ja e-tehingute määrus, mille eesmärk on lihtsustada piiriülest elektrooniliste teenuste tarbimist ühtsustatud standardite ja tegutsemispõhimõtete kaudu. Määrus võeti vastu 23. juulil 2014 ja alates 1. juulist 2016 peavad Euroopa Liidu riigid tunnustama teineteise e-allkirju ning Eesti kodanike digiallkirju peab aktsepteerima ka teiste EL liikmesriikide avaliku sektori asutused.", @@ -41,36 +23,6 @@ "category": "digital_identity", "language": "en" }, - { - "input": "What is an EORI number and do I need one?", - "expected_output": "An EORI (Economic Operators Registration and Identification) number is mandatory for businesses engaged in foreign trade when importing goods, transit shipments, exporting, or performing other customs operations. If you're doing any cross-border trade within or outside the EU, you need an EORI number. To get one: In Estonia, the Tax and Customs Board issues EORI numbers. You can apply electronically through the e-MTA service portal at https://maasikas.emta.ee/login/?lang=et. After logging in, select \"Customs\" → \"Rights and obligations\" → \"Economic Operators Registration and Identification number (EORI)\". You need access rights to submit an application - see the access rights section for details. If you don't have access to e-MTA, you can submit your application at a Tax and Customs Board service location.", - "category": "emta", - "language": "en" - }, - { - "input": "How do I check if a business partner is trustworthy before doing business with them?", - "expected_output": "You should verify your business partner's background through multiple public databases and sources. This helps you avoid tax fraud chains and ensure your partner can fulfill their obligations. Here's how to check them: Essential checks: VAT registration: Check if they have a valid Estonian VAT number (KMKR) at https://apps.emta.ee/saqu/public/kmkrnr?lang=et. Without a valid VAT number, the seller cannot add VAT to transactions and the buyer cannot deduct input VAT. EU VAT number: For EU partners, verify their VAT number at https://ec.europa.eu/taxation_customs/vies/#/vat-validation. Check that the name matches the VAT number. Tax debt: Check for tax debts at https://apps.emta.ee/saqu/public/taxdebt?lang=et. Tax debt may indicate payment difficulties or problems meeting deadlines. Public data query: Get comprehensive company information at https://maasikas.emta.ee/rating/search including address, declared turnover, paid taxes, number of employees, outstanding declarations, and business bans. Business Register: Check company details at https://ariregister.rik.ee/ including registration code, legal form, share capital, and annual reports. Additional verification: Request tax data certificates directly through e-MTA - partners can send these showing their labor, salary, turnover, and tax debt data Check paid taxes tables to see how much they've paid in taxes and their declared turnover Search online for articles, forum discussions, payment default registers, and court decisions about the partner Check the Economic Activities Register (MTR) at https://mtr.ttja.ee/ for licenses and permits Use credit info databases like Inforegister, Creditinfo Estonia, or Äripäeva Infopank for credit ratings and payment behavior", - "category": "emta", - "language": "en" - }, - { - "input": "Kas ma saan kontrollida, kas minu äripartneril on karistusregister?", - "expected_output": "Jah, saate kontrollida karistusregistrit aadressil http://www.rik.ee/et/karistusregister 4 euro eest. Register sisaldab andmeid karistatud isikute (sh juriidiliste isikute) ja nende karistuste kohta. Kui registrist nähtub varasemaid maksualaseid süütegusid, on see riskitegur, et nendega äri tehes võite sattuda maksupettuse ahelasse. Korduvad rikkumised võivad viidata madalale seaduskuulekusele ja muuta tehingud riskantseks.", - "category": "emta", - "language": "et" - }, - { - "input": "Как проверить, заслуживает ли деловой партнер доверия, перед началом сотрудничества?", - "expected_output": "Вам следует проверить информацию о вашем деловом партнере через несколько публичных баз данных и источников. Это поможет избежать цепочек налогового мошенничества и убедиться, что партнер может выполнить свои обязательства. Как их проверить: Основные проверки: Регистрация НДС: Проверьте, есть ли у них действительный эстонский номер плательщика НДС (KMKR) на сайте https://apps.emta.ee/saqu/public/kmkrnr?lang=et. Без действительного номера плательщика НДС продавец не имеет права добавлять НДС к сделке, а покупатель не может вычесть входящий НДС. Номер НДС ЕС: Для партнеров из ЕС проверьте номер НДС на сайте https://ec.europa.eu/taxation_customs/vies/#/vat-validation. Убедитесь, что название соответствует номеру НДС. Налоговая задолженность: Проверьте наличие налоговых долгов на сайте https://apps.emta.ee/saqu/public/taxdebt?lang=et. Налоговая задолженность может указывать на платежные трудности или проблемы с соблюдением сроков. Запрос публичных данных: Получите подробную информацию о компании на сайте https://maasikas.emta.ee/rating/search, включая адрес, декларированный оборот, уплаченные налоги, количество сотрудников, непредставленные декларации и запреты на деятельность. Коммерческий регистр: Проверьте данные компании на сайте https://ariregister.rik.ee/, включая регистрационный код, юридическую форму, уставный капитал и годовые отчеты. Дополнительная проверка: Запросите справки о налоговых данных напрямую через e-MTA - партнеры могут отправлять их, показывая данные о рабочей силе, зарплате, обороте и налоговой задолженности Проверьте таблицы уплаченных налогов, чтобы увидеть, сколько они заплатили налогов и декларировали оборот Поищите в интернете статьи, темы обсуждений на форумах, результаты регистра платежных нарушений и судебные решения о партнере Проверьте Регистр экономической деятельности (MTR) на сайте https://mtr.ttja.ee/ на наличие лицензий и разрешений Используйте базы данных кредитной информации, такие как Inforegister, Creditinfo Estonia или Äripäeva Infopank для кредитных рейтингов и платежного поведения", - "category": "emta", - "language": "ru" - }, - { - "input": "Kus saab ID-kaarti isikut tõendava dokumendina kasutada?", - "expected_output": "ID-kaarti saab kasutada isikut tõendava dokumendina, digitaalsete teenuste kasutamiseks, nagu internetipank ja e-maksuamet, samuti digitaalse allkirja andmiseks ja dokumentide krüpteerimiseks. Samuti on võimalik seda kasutada Euroopa Liidu ja Euroopa Majanduspiirkonna piires reisidokumendina.", - "category": "id_card_usage", - "language": "et" - }, { "input": "Kuidas aktiveerida Mobiil-ID?", "expected_output": "Mobiil-ID aktiveerimine toimub operaatorite iseteeninduses (Telia, Elisa, Tele2).", @@ -95,18 +47,6 @@ "category": "ttja", "language": "en" }, - { - "input": "Mis on .asice ja .bdoc failide vahe?", - "expected_output": "Mõlemad on BDOC digiallkirjaformaadi alamformaadid. .asice (ASiC-E LT) on BDOC allkiri ajatempliga ja on parima rahvusvahelise ühilduvusega. DigiDoc4 kasutab kõikide uute dokumentide allkirjastamisel automaatselt .asice vormingut. .bdoc formaati kasutati varem, kuid uued allkirjad luuakse .asice formaadis.", - "category": "digital_signature", - "language": "et" - }, - { - "input": "How long is the e-residency digi-ID valid for?", - "expected_output": "The e-residency digi-ID is valid for 5 years", - "category": "digital_identity", - "language": "en" - }, { "input": "Предоставляет ли электронное резидентство эстонское гражданство или налоговое резидентство?", "expected_output": "Нет, электронное резидентство не предоставляет эстонское гражданство или налоговое резидентство.", diff --git a/tests/deepeval_tests/report_generator.py b/tests/deepeval_tests/report_generator.py index 0db45ac..398d184 100644 --- a/tests/deepeval_tests/report_generator.py +++ b/tests/deepeval_tests/report_generator.py @@ -161,13 +161,10 @@ def generate_failure_analysis(results: Dict[str, Any]) -> str: if len(failure["input"]) > 50 else failure["input"] ) - reason_preview = ( - failure["reason"] ) + reason_preview = failure["reason"] analysis += f"| {failure['test_case']} | {query_preview} | {failure['metric']} | {failure['score']:.2f} | {reason_preview} |\n" - - analysis += "\n" return analysis diff --git a/tests/deepeval_tests/standard_tests.py b/tests/deepeval_tests/standard_tests.py index 19d4b33..fa542e2 100644 --- a/tests/deepeval_tests/standard_tests.py +++ b/tests/deepeval_tests/standard_tests.py @@ -113,6 +113,7 @@ def save_results_fixture(): import httpx # Replace requests with httpx import asyncio + class TestRAGSystem: """Test suite for RAG system evaluation using DeepEval metrics via API.""" @@ -177,7 +178,10 @@ async def test_all_metrics(self, test_item: Dict[str, Any], orchestration_client except httpx.RequestError as e: result = {"content": f"API Error: {str(e)}", "retrieval_context": []} except Exception as e: - result = {"content": f"Unexpected error: {str(e)}", "retrieval_context": []} + result = { + "content": f"Unexpected error: {str(e)}", + "retrieval_context": [], + } if result is None: result = {"content": "No response received", "retrieval_context": []} # --- DEBUG LOGGING --- @@ -186,20 +190,23 @@ async def test_all_metrics(self, test_item: Dict[str, Any], orchestration_client print("=" * 80) print(f"Response keys: {list(result.keys())}") for key, value in result.items(): - print(key, value) print(f"Content length: {len(result.get('content', ''))}") - print(f"Retrieval context: {len(result.get('retrieval_context', []))} chunks") - - if result.get('retrieval_context'): - for chunk in result['retrieval_context']: + print(f"Retrieval context: {len(result.get('retrieval_context') or [])} chunks") + + if result.get("retrieval_context"): + for chunk in result["retrieval_context"]: print(chunk.keys()) - context = chunk.get('content', '') if isinstance(chunk, dict) else str(chunk) - meta = chunk.get('metadata', {}) if isinstance(chunk, dict) else {} - fused_score = meta.get('fused_score', 'N/A') - bm25_score = meta.get('bm25_score', 'N/A') - semantic_score = meta.get('semantic_score', 'N/A') - print(f"Chunk (fused: {fused_score}, bm25: {bm25_score}, semantic: {semantic_score}):\n {context}\n\n") + context = ( + chunk.get("content", "") if isinstance(chunk, dict) else str(chunk) + ) + meta = chunk.get("metadata", {}) if isinstance(chunk, dict) else {} + fused_score = meta.get("fused_score", "N/A") + bm25_score = meta.get("bm25_score", "N/A") + semantic_score = meta.get("semantic_score", "N/A") + print( + f"Chunk (fused: {fused_score}, bm25: {bm25_score}, semantic: {semantic_score}):\n {context}\n\n" + ) else: print("WARNING: No retrieval context returned!") print("=" * 80) @@ -242,10 +249,15 @@ async def run_metric(metric_name, metric): "reason": f"Error: {str(e)}", } - metric_results_list = await asyncio.gather( - *(run_metric(name, metric) for name, metric in metrics) - ) - metrics_results = dict(metric_results_list) + # Run metrics sequentially with delays to avoid rate limiting + metrics_results = {} + for i, (name, metric) in enumerate(metrics): + print(f" Running {name} metric...") + result_name, result_data = await run_metric(name, metric) + metrics_results[result_name] = result_data + # Add delay between metrics to respect rate limits (except after last metric) + if i < len(metrics) - 1: + await asyncio.sleep(15) # 15 second delay for Azure S0 tier rate limits # --- Collect results --- try: @@ -262,4 +274,6 @@ async def run_metric(metric_name, metric): # --- Assert --- failed = [name for name, res in metrics_results.items() if not res["passed"]] if failed: - pytest.fail(f"Metrics failed: {', '.join(failed)} for input: {test_item['input'][:50]}") + pytest.fail( + f"Metrics failed: {', '.join(failed)} for input: {test_item['input'][:50]}" + ) diff --git a/tests/helpers/test_data_loader.py b/tests/helpers/test_data_loader.py index 8296798..f95c203 100644 --- a/tests/helpers/test_data_loader.py +++ b/tests/helpers/test_data_loader.py @@ -1,4 +1,5 @@ """Helper module to load test data into Qdrant before running tests.""" + import json import uuid from typing import List, Dict, Any, Tuple @@ -17,63 +18,59 @@ def load_test_data_into_qdrant( # Load pre-computed embeddings embeddings_file = Path(__file__).parent.parent / "data" / "test_embeddings.json" - + if not embeddings_file.exists(): raise FileNotFoundError( f"Pre-computed embeddings not found at {embeddings_file}. " "Run create_embeddings.py first!" ) - + logger.info(f"Loading pre-computed embeddings from {embeddings_file}") chunks_data, model_used = load_precomputed_embeddings(embeddings_file) - + # Index into Qdrant index_embeddings_to_qdrant( - qdrant_url=qdrant_url, - chunks_data=chunks_data, - model_used=model_used + qdrant_url=qdrant_url, chunks_data=chunks_data, model_used=model_used ) def load_precomputed_embeddings( - embeddings_file: Path + embeddings_file: Path, ) -> Tuple[List[Dict[str, Any]], str]: """Load pre-computed embeddings from file.""" with open(embeddings_file, "r", encoding="utf-8") as f: data = json.load(f) - + chunks_data = data["chunks"] model_used = data["model_used"] - + logger.info(f"Loaded {len(chunks_data)} pre-computed chunks") logger.info(f" Vector size: {data['vector_size']}") logger.info(f" Model: {model_used}") logger.info(f" Documents: {data['total_documents']}") - + return chunks_data, model_used def index_embeddings_to_qdrant( - qdrant_url: str, - chunks_data: List[Dict[str, Any]], - model_used: str + qdrant_url: str, chunks_data: List[Dict[str, Any]], model_used: str ) -> None: """Index embeddings into Qdrant.""" if not chunks_data: logger.warning("No chunks to index") return - + vector_size = chunks_data[0]["vector_dimensions"] collection_name = _determine_collection_from_model(model_used) - + logger.info(f"Indexing into Qdrant collection: {collection_name}") - + client = httpx.Client(timeout=30.0) - + try: # Check if collection exists response = client.get(f"{qdrant_url}/collections/{collection_name}") - + if response.status_code == 404: logger.info(f"Creating collection '{collection_name}'...") create_payload = { @@ -84,24 +81,23 @@ def index_embeddings_to_qdrant( "optimizers_config": {"default_segment_number": 2}, "replication_factor": 1, } - + response = client.put( - f"{qdrant_url}/collections/{collection_name}", - json=create_payload + f"{qdrant_url}/collections/{collection_name}", json=create_payload ) - + if response.status_code not in [200, 201]: raise RuntimeError(f"Failed to create collection: {response.text}") - + logger.info(f"Created collection '{collection_name}'") else: logger.info(f"Collection '{collection_name}' already exists") - + # Prepare points points = [] for chunk in chunks_data: point_id = str(uuid.uuid5(uuid.NAMESPACE_DNS, chunk["chunk_id"])) - + payload = { "chunk_id": chunk["chunk_id"], "document_hash": chunk["document_hash"], @@ -113,38 +109,38 @@ def index_embeddings_to_qdrant( "embedding_model": chunk["embedding_model"], "vector_dimensions": chunk["vector_dimensions"], "document_url": chunk["metadata"].get("source", "test_document"), - "dataset_collection": chunk["metadata"].get("dataset_collection", "test_collection"), + "dataset_collection": chunk["metadata"].get( + "dataset_collection", "test_collection" + ), "processing_timestamp": datetime.now().isoformat(), "tokens_count": chunk["tokens_count"], **chunk["metadata"], } - - points.append({ - "id": point_id, - "vector": chunk["embedding"], - "payload": payload - }) - + + points.append( + {"id": point_id, "vector": chunk["embedding"], "payload": payload} + ) + # Upsert points in batches batch_size = 100 for i in range(0, len(points), batch_size): batch = points[i : i + batch_size] upsert_payload = {"points": batch} - + response = client.put( f"{qdrant_url}/collections/{collection_name}/points", json=upsert_payload, ) - + if response.status_code not in [200, 201]: raise RuntimeError(f"Failed to upsert points: {response.text}") - + logger.info(f"Indexed batch {i // batch_size + 1} ({len(batch)} points)") - + client.close() - + logger.info(f" Successfully indexed {len(points)} chunks into Qdrant") - + except Exception as e: logger.error(f"Failed to index to Qdrant: {e}") raise @@ -171,4 +167,4 @@ def _determine_collection_from_model(model_name: str) -> str: logger.warning( f"Unknown model {model_name}, defaulting to contextual_chunks_azure" ) - return "contextual_chunks_azure" \ No newline at end of file + return "contextual_chunks_azure"