From 5b1cb445c2a6b950db73bfb2f6a4a61026d6a96e Mon Sep 17 00:00:00 2001 From: nuwangeek Date: Wed, 21 Jan 2026 06:46:09 +0530 Subject: [PATCH 01/10] testing --- .github/workflows/deepeval-tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/deepeval-tests.yml b/.github/workflows/deepeval-tests.yml index 7c33ad8..42cc772 100644 --- a/.github/workflows/deepeval-tests.yml +++ b/.github/workflows/deepeval-tests.yml @@ -3,7 +3,7 @@ name: DeepEval RAG System Tests on: pull_request: types: [opened, synchronize, reopened] - branches: ["rag-33-debug", "RAG-33-31okt", "wip_3_12"] + branches: ["RAG-217-debug"] paths: - 'src/**' - 'tests/**' @@ -94,7 +94,7 @@ jobs: 2. Add the missing secrets with the appropriate values: **Azure OpenAI Configuration:** - - \`AZURE_OPENAI_ENDPOINT\` - Your Azure OpenAI resource endpoint (e.g., \`https://your-resource.openai.azure.com/\`) + - \`AZURE_OPENAI_ENDPOINT\` - Your Azure OpenAI endpoint (e.g., \`https://your-resource.openai.azure.com/\`) - \`AZURE_OPENAI_API_KEY\` - Your Azure OpenAI API key - \`AZURE_OPENAI_DEPLOYMENT\` - Chat model deployment name (e.g., \`gpt-4o-mini\`) - \`AZURE_OPENAI_EMBEDDING_DEPLOYMENT\` - Embedding model deployment name (e.g., \`text-embedding-3-large\`) From 29d0fcb04a80895d8bd5b9e956507fd9605174ae Mon Sep 17 00:00:00 2001 From: nuwangeek Date: Wed, 21 Jan 2026 06:47:34 +0530 Subject: [PATCH 02/10] fixed formatting issues --- src/llm_orchestration_service.py | 12 ++-- src/llm_orchestration_service_api.py | 3 +- src/models/request_models.py | 2 +- tests/conftest.py | 49 ++++++++------- tests/deepeval_tests/report_generator.py | 5 +- tests/deepeval_tests/standard_tests.py | 33 ++++++---- tests/helpers/test_data_loader.py | 76 +++++++++++------------- 7 files changed, 91 insertions(+), 89 deletions(-) diff --git a/src/llm_orchestration_service.py b/src/llm_orchestration_service.py index abef9a1..931bdd0 100644 --- a/src/llm_orchestration_service.py +++ b/src/llm_orchestration_service.py @@ -2192,7 +2192,7 @@ def _generate_rag_response( }, output=answer, ) - + retrieval_context: List[Dict[str, Any]] | None = None if eval_mode and relevant_chunks: max_blocks_used = ResponseGenerationConstants.DEFAULT_MAX_BLOCKS @@ -2230,16 +2230,16 @@ def _generate_rag_response( chunks=None, # No chunks when question is out of scope ) else: - response = OrchestrationResponse( + response = OrchestrationResponse( chatId=request.chatId, llmServiceActive=True, # service OK; insufficient context questionOutOfLLMScope=True, inputGuardFailed=False, content=localized_msg, ) - if eval_mode: + if eval_mode: response.retrieval_context = retrieval_context - return response + return response # In-scope: return the answer as-is (NO citations) logger.info("Returning in-scope answer without citations.") @@ -2264,14 +2264,14 @@ def _generate_rag_response( chunks=self._format_chunks_for_test_response(relevant_chunks), ) else: - response = OrchestrationResponse( + response = OrchestrationResponse( chatId=request.chatId, llmServiceActive=True, questionOutOfLLMScope=False, inputGuardFailed=False, content=content_with_refs, ) - if eval_mode: + if eval_mode: response.retrieval_context = retrieval_context return response diff --git a/src/llm_orchestration_service_api.py b/src/llm_orchestration_service_api.py index 2f7edee..c171763 100644 --- a/src/llm_orchestration_service_api.py +++ b/src/llm_orchestration_service_api.py @@ -1,4 +1,5 @@ """LLM Orchestration Service API - FastAPI application.""" + import os from contextlib import asynccontextmanager from typing import Any, AsyncGenerator, Dict @@ -40,7 +41,7 @@ ContextGenerationRequest, ContextGenerationResponse, EmbeddingErrorResponse, - DeepEvalTestOrchestrationResponse + DeepEvalTestOrchestrationResponse, ) diff --git a/src/models/request_models.py b/src/models/request_models.py index 539d708..82a64b9 100644 --- a/src/models/request_models.py +++ b/src/models/request_models.py @@ -278,4 +278,4 @@ class DeepEvalTestOrchestrationResponse(BaseModel): content: str retrieval_context: Optional[List[Dict[str, Any]]] = None refined_questions: Optional[List[str]] = None - expected_output: Optional[str] = None # For DeepEval \ No newline at end of file + expected_output: Optional[str] = None # For DeepEval diff --git a/tests/conftest.py b/tests/conftest.py index b67d114..8bbf38e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -16,14 +16,11 @@ def download_embeddings_from_azure( - connection_string: str, - container_name: str, - blob_name: str, - local_path: Path + connection_string: str, container_name: str, blob_name: str, local_path: Path ) -> None: """ Download pre-computed embeddings from Azure Blob Storage. - + Args: connection_string: Azure Storage connection string container_name: Name of the blob container @@ -34,28 +31,29 @@ def download_embeddings_from_azure( logger.info(f" Container: {container_name}") logger.info(f" Blob: {blob_name}") logger.info(f" Local path: {local_path}") - + try: # Create BlobServiceClient - blob_service_client = BlobServiceClient.from_connection_string(connection_string) - + blob_service_client = BlobServiceClient.from_connection_string( + connection_string + ) + # Get blob client blob_client = blob_service_client.get_blob_client( - container=container_name, - blob=blob_name + container=container_name, blob=blob_name ) - + # Ensure parent directory exists local_path.parent.mkdir(parents=True, exist_ok=True) - + # Download the blob with open(local_path, "wb") as download_file: download_stream = blob_client.download_blob() download_file.write(download_stream.readall()) - + file_size_kb = local_path.stat().st_size / 1024 logger.info(f"✓ Downloaded embeddings successfully ({file_size_kb:.2f} KB)") - + except Exception as e: logger.error(f"Failed to download embeddings from Azure: {e}") raise @@ -144,10 +142,10 @@ def start(self) -> None: """Start all test containers and bootstrap Vault""" logger.info("Starting RAG Stack testcontainers...") os.environ["EVAL_MODE"] = "true" - + # Download embeddings from Azure before starting containers self._download_embeddings_from_azure() - + # Prepare Vault Agent directories agent_in = self.project_root / "test-vault" / "agents" / "llm" agent_out = self.project_root / "test-vault" / "agent-out" @@ -212,15 +210,15 @@ def _download_embeddings_from_azure(self) -> None: connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING") container_name = os.getenv("AZURE_STORAGE_CONTAINER_NAME", "test-embeddings") blob_name = os.getenv("AZURE_STORAGE_BLOB_NAME", "test_embeddings.json") - + # Local path where embeddings should be saved embeddings_file = self.project_root / "tests" / "data" / "test_embeddings.json" - + # Skip if embeddings already exist locally (for local development) - #if embeddings_file.exists(): + # if embeddings_file.exists(): # logger.info("Embeddings file already exists locally, skipping Azure download") # return - + # Require Azure configuration for CI/CD if not connection_string: raise ValueError( @@ -228,17 +226,17 @@ def _download_embeddings_from_azure(self) -> None: "Either set this environment variable or ensure test_embeddings.json " f"exists at {embeddings_file}" ) - + logger.info("=" * 80) logger.info("DOWNLOADING EMBEDDINGS FROM AZURE BLOB STORAGE") logger.info("=" * 80) - + try: download_embeddings_from_azure( connection_string=connection_string, container_name=container_name, blob_name=blob_name, - local_path=embeddings_file + local_path=embeddings_file, ) logger.info("Embeddings download complete") except Exception as e: @@ -797,8 +795,9 @@ def orchestration_client(rag_stack: RAGStackTestContainers): Function-scoped fixture that provides the orchestration service URL. Tests can use either requests (sync) or httpx (async). """ + class OrchestrationClient: def __init__(self, base_url: str): self.base_url = base_url - - return OrchestrationClient(rag_stack.get_orchestration_service_url()) \ No newline at end of file + + return OrchestrationClient(rag_stack.get_orchestration_service_url()) diff --git a/tests/deepeval_tests/report_generator.py b/tests/deepeval_tests/report_generator.py index 0db45ac..398d184 100644 --- a/tests/deepeval_tests/report_generator.py +++ b/tests/deepeval_tests/report_generator.py @@ -161,13 +161,10 @@ def generate_failure_analysis(results: Dict[str, Any]) -> str: if len(failure["input"]) > 50 else failure["input"] ) - reason_preview = ( - failure["reason"] ) + reason_preview = failure["reason"] analysis += f"| {failure['test_case']} | {query_preview} | {failure['metric']} | {failure['score']:.2f} | {reason_preview} |\n" - - analysis += "\n" return analysis diff --git a/tests/deepeval_tests/standard_tests.py b/tests/deepeval_tests/standard_tests.py index 19d4b33..d598e90 100644 --- a/tests/deepeval_tests/standard_tests.py +++ b/tests/deepeval_tests/standard_tests.py @@ -113,6 +113,7 @@ def save_results_fixture(): import httpx # Replace requests with httpx import asyncio + class TestRAGSystem: """Test suite for RAG system evaluation using DeepEval metrics via API.""" @@ -177,7 +178,10 @@ async def test_all_metrics(self, test_item: Dict[str, Any], orchestration_client except httpx.RequestError as e: result = {"content": f"API Error: {str(e)}", "retrieval_context": []} except Exception as e: - result = {"content": f"Unexpected error: {str(e)}", "retrieval_context": []} + result = { + "content": f"Unexpected error: {str(e)}", + "retrieval_context": [], + } if result is None: result = {"content": "No response received", "retrieval_context": []} # --- DEBUG LOGGING --- @@ -186,20 +190,23 @@ async def test_all_metrics(self, test_item: Dict[str, Any], orchestration_client print("=" * 80) print(f"Response keys: {list(result.keys())}") for key, value in result.items(): - print(key, value) print(f"Content length: {len(result.get('content', ''))}") print(f"Retrieval context: {len(result.get('retrieval_context', []))} chunks") - - if result.get('retrieval_context'): - for chunk in result['retrieval_context']: + + if result.get("retrieval_context"): + for chunk in result["retrieval_context"]: print(chunk.keys()) - context = chunk.get('content', '') if isinstance(chunk, dict) else str(chunk) - meta = chunk.get('metadata', {}) if isinstance(chunk, dict) else {} - fused_score = meta.get('fused_score', 'N/A') - bm25_score = meta.get('bm25_score', 'N/A') - semantic_score = meta.get('semantic_score', 'N/A') - print(f"Chunk (fused: {fused_score}, bm25: {bm25_score}, semantic: {semantic_score}):\n {context}\n\n") + context = ( + chunk.get("content", "") if isinstance(chunk, dict) else str(chunk) + ) + meta = chunk.get("metadata", {}) if isinstance(chunk, dict) else {} + fused_score = meta.get("fused_score", "N/A") + bm25_score = meta.get("bm25_score", "N/A") + semantic_score = meta.get("semantic_score", "N/A") + print( + f"Chunk (fused: {fused_score}, bm25: {bm25_score}, semantic: {semantic_score}):\n {context}\n\n" + ) else: print("WARNING: No retrieval context returned!") print("=" * 80) @@ -262,4 +269,6 @@ async def run_metric(metric_name, metric): # --- Assert --- failed = [name for name, res in metrics_results.items() if not res["passed"]] if failed: - pytest.fail(f"Metrics failed: {', '.join(failed)} for input: {test_item['input'][:50]}") + pytest.fail( + f"Metrics failed: {', '.join(failed)} for input: {test_item['input'][:50]}" + ) diff --git a/tests/helpers/test_data_loader.py b/tests/helpers/test_data_loader.py index 8296798..f95c203 100644 --- a/tests/helpers/test_data_loader.py +++ b/tests/helpers/test_data_loader.py @@ -1,4 +1,5 @@ """Helper module to load test data into Qdrant before running tests.""" + import json import uuid from typing import List, Dict, Any, Tuple @@ -17,63 +18,59 @@ def load_test_data_into_qdrant( # Load pre-computed embeddings embeddings_file = Path(__file__).parent.parent / "data" / "test_embeddings.json" - + if not embeddings_file.exists(): raise FileNotFoundError( f"Pre-computed embeddings not found at {embeddings_file}. " "Run create_embeddings.py first!" ) - + logger.info(f"Loading pre-computed embeddings from {embeddings_file}") chunks_data, model_used = load_precomputed_embeddings(embeddings_file) - + # Index into Qdrant index_embeddings_to_qdrant( - qdrant_url=qdrant_url, - chunks_data=chunks_data, - model_used=model_used + qdrant_url=qdrant_url, chunks_data=chunks_data, model_used=model_used ) def load_precomputed_embeddings( - embeddings_file: Path + embeddings_file: Path, ) -> Tuple[List[Dict[str, Any]], str]: """Load pre-computed embeddings from file.""" with open(embeddings_file, "r", encoding="utf-8") as f: data = json.load(f) - + chunks_data = data["chunks"] model_used = data["model_used"] - + logger.info(f"Loaded {len(chunks_data)} pre-computed chunks") logger.info(f" Vector size: {data['vector_size']}") logger.info(f" Model: {model_used}") logger.info(f" Documents: {data['total_documents']}") - + return chunks_data, model_used def index_embeddings_to_qdrant( - qdrant_url: str, - chunks_data: List[Dict[str, Any]], - model_used: str + qdrant_url: str, chunks_data: List[Dict[str, Any]], model_used: str ) -> None: """Index embeddings into Qdrant.""" if not chunks_data: logger.warning("No chunks to index") return - + vector_size = chunks_data[0]["vector_dimensions"] collection_name = _determine_collection_from_model(model_used) - + logger.info(f"Indexing into Qdrant collection: {collection_name}") - + client = httpx.Client(timeout=30.0) - + try: # Check if collection exists response = client.get(f"{qdrant_url}/collections/{collection_name}") - + if response.status_code == 404: logger.info(f"Creating collection '{collection_name}'...") create_payload = { @@ -84,24 +81,23 @@ def index_embeddings_to_qdrant( "optimizers_config": {"default_segment_number": 2}, "replication_factor": 1, } - + response = client.put( - f"{qdrant_url}/collections/{collection_name}", - json=create_payload + f"{qdrant_url}/collections/{collection_name}", json=create_payload ) - + if response.status_code not in [200, 201]: raise RuntimeError(f"Failed to create collection: {response.text}") - + logger.info(f"Created collection '{collection_name}'") else: logger.info(f"Collection '{collection_name}' already exists") - + # Prepare points points = [] for chunk in chunks_data: point_id = str(uuid.uuid5(uuid.NAMESPACE_DNS, chunk["chunk_id"])) - + payload = { "chunk_id": chunk["chunk_id"], "document_hash": chunk["document_hash"], @@ -113,38 +109,38 @@ def index_embeddings_to_qdrant( "embedding_model": chunk["embedding_model"], "vector_dimensions": chunk["vector_dimensions"], "document_url": chunk["metadata"].get("source", "test_document"), - "dataset_collection": chunk["metadata"].get("dataset_collection", "test_collection"), + "dataset_collection": chunk["metadata"].get( + "dataset_collection", "test_collection" + ), "processing_timestamp": datetime.now().isoformat(), "tokens_count": chunk["tokens_count"], **chunk["metadata"], } - - points.append({ - "id": point_id, - "vector": chunk["embedding"], - "payload": payload - }) - + + points.append( + {"id": point_id, "vector": chunk["embedding"], "payload": payload} + ) + # Upsert points in batches batch_size = 100 for i in range(0, len(points), batch_size): batch = points[i : i + batch_size] upsert_payload = {"points": batch} - + response = client.put( f"{qdrant_url}/collections/{collection_name}/points", json=upsert_payload, ) - + if response.status_code not in [200, 201]: raise RuntimeError(f"Failed to upsert points: {response.text}") - + logger.info(f"Indexed batch {i // batch_size + 1} ({len(batch)} points)") - + client.close() - + logger.info(f" Successfully indexed {len(points)} chunks into Qdrant") - + except Exception as e: logger.error(f"Failed to index to Qdrant: {e}") raise @@ -171,4 +167,4 @@ def _determine_collection_from_model(model_name: str) -> str: logger.warning( f"Unknown model {model_name}, defaulting to contextual_chunks_azure" ) - return "contextual_chunks_azure" \ No newline at end of file + return "contextual_chunks_azure" From f2ad52c9928618a836d2ad3613ca0645e65af837 Mon Sep 17 00:00:00 2001 From: nuwangeek Date: Wed, 21 Jan 2026 07:45:12 +0530 Subject: [PATCH 03/10] fixed one issue --- tests/deepeval_tests/standard_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/deepeval_tests/standard_tests.py b/tests/deepeval_tests/standard_tests.py index d598e90..7bd81f9 100644 --- a/tests/deepeval_tests/standard_tests.py +++ b/tests/deepeval_tests/standard_tests.py @@ -192,7 +192,7 @@ async def test_all_metrics(self, test_item: Dict[str, Any], orchestration_client for key, value in result.items(): print(key, value) print(f"Content length: {len(result.get('content', ''))}") - print(f"Retrieval context: {len(result.get('retrieval_context', []))} chunks") + print(f"Retrieval context: {len(result.get('retrieval_context') or [])} chunks") if result.get("retrieval_context"): for chunk in result["retrieval_context"]: From ed60c34962ba41bfc5e12e3870138e9285404013 Mon Sep 17 00:00:00 2001 From: nuwangeek Date: Wed, 21 Jan 2026 08:58:33 +0530 Subject: [PATCH 04/10] fix rate limiting issue --- tests/deepeval_tests/standard_tests.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/deepeval_tests/standard_tests.py b/tests/deepeval_tests/standard_tests.py index 7bd81f9..8a8f2b8 100644 --- a/tests/deepeval_tests/standard_tests.py +++ b/tests/deepeval_tests/standard_tests.py @@ -249,10 +249,12 @@ async def run_metric(metric_name, metric): "reason": f"Error: {str(e)}", } - metric_results_list = await asyncio.gather( - *(run_metric(name, metric) for name, metric in metrics) - ) - metrics_results = dict(metric_results_list) + # Run metrics sequentially to avoid rate limiting + metrics_results = {} + for name, metric in metrics: + print(f" Running {name} metric...") + result_name, result_data = await run_metric(name, metric) + metrics_results[result_name] = result_data # --- Collect results --- try: From 0bfc810c521a202637a657ed7d43dc0c33d97844 Mon Sep 17 00:00:00 2001 From: nuwangeek Date: Wed, 21 Jan 2026 09:20:06 +0530 Subject: [PATCH 05/10] fixing rate limiting issue --- .github/workflows/deepeval-tests.yml | 4 ++-- tests/deepeval_tests/standard_tests.py | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/deepeval-tests.yml b/.github/workflows/deepeval-tests.yml index 42cc772..f26d741 100644 --- a/.github/workflows/deepeval-tests.yml +++ b/.github/workflows/deepeval-tests.yml @@ -193,8 +193,8 @@ jobs: AZURE_STORAGE_BLOB_NAME: ${{ secrets.AZURE_STORAGE_BLOB_NAME }} EVAL_MODE: "true" run: | - # Run tests with testcontainers managing Docker Compose - uv run python -m pytest tests/deepeval_tests/standard_tests.py -v --tb=short --log-cli-level=INFO + # Run tests sequentially (one at a time) to avoid rate limiting + uv run python -m pytest tests/deepeval_tests/standard_tests.py -v --tb=short --log-cli-level=INFO -n 0 - name: Fix permissions on test artifacts if: always() diff --git a/tests/deepeval_tests/standard_tests.py b/tests/deepeval_tests/standard_tests.py index 8a8f2b8..fdebfa0 100644 --- a/tests/deepeval_tests/standard_tests.py +++ b/tests/deepeval_tests/standard_tests.py @@ -249,12 +249,15 @@ async def run_metric(metric_name, metric): "reason": f"Error: {str(e)}", } - # Run metrics sequentially to avoid rate limiting + # Run metrics sequentially with delays to avoid rate limiting metrics_results = {} - for name, metric in metrics: + for i, (name, metric) in enumerate(metrics): print(f" Running {name} metric...") result_name, result_data = await run_metric(name, metric) metrics_results[result_name] = result_data + # Add delay between metrics to respect rate limits (except after last metric) + if i < len(metrics) - 1: + await asyncio.sleep(10) # 10 second delay for Azure S0 tier rate limits # --- Collect results --- try: From 6d1658268e161f9f629f219ac56117f40ac1c6b5 Mon Sep 17 00:00:00 2001 From: nuwangeek Date: Wed, 21 Jan 2026 09:27:15 +0530 Subject: [PATCH 06/10] fix rate limiting issue --- tests/deepeval_tests/standard_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/deepeval_tests/standard_tests.py b/tests/deepeval_tests/standard_tests.py index fdebfa0..fa542e2 100644 --- a/tests/deepeval_tests/standard_tests.py +++ b/tests/deepeval_tests/standard_tests.py @@ -257,7 +257,7 @@ async def run_metric(metric_name, metric): metrics_results[result_name] = result_data # Add delay between metrics to respect rate limits (except after last metric) if i < len(metrics) - 1: - await asyncio.sleep(10) # 10 second delay for Azure S0 tier rate limits + await asyncio.sleep(15) # 15 second delay for Azure S0 tier rate limits # --- Collect results --- try: From 5db146481a23afa9c83ba0a191ecf8881bdbbb59 Mon Sep 17 00:00:00 2001 From: nuwangeek Date: Wed, 21 Jan 2026 09:36:03 +0530 Subject: [PATCH 07/10] reduce test cases --- tests/data/test_dataset.json | 96 ------------------------------------ 1 file changed, 96 deletions(-) diff --git a/tests/data/test_dataset.json b/tests/data/test_dataset.json index 0b09b6e..f1f9c7e 100644 --- a/tests/data/test_dataset.json +++ b/tests/data/test_dataset.json @@ -16,101 +16,5 @@ "expected_output": "Kui sinu ID-kaart on kadunud, tuleks tegutseda järgmiselt: Mõtle läbi, kas kaart võib olla turvalises kohas – näiteks kontorilaual või mujal, kus seda hiljuti kasutasid. Peata ID-kaardiga seotud sertifikaadid, helistades numbril +372 677 3377. Sertifikaatide peatamine tähendab, et kaarti ei saa kasutada elektroonilistes kanalites, kuid see kehtib endiselt füüsilise isikut tõendava dokumendina (nt reisidokumendina). Kui oled kindel, et kaart on varastatud, teavita politseid ja lase ID-kaart kehtetuks tunnistada. Kui kaardi kadumine toimub välismaal, saad Eesti välisesindusest taotleda ajutise tagasipöördumise tunnistuse. Kui vajad täiendavat abi, võid pöörduda Politsei- ja Piirivalveameti teenindusse.", "category": "digital_identity_security", "language": "et" - }, - { - "input": "Millised mobiilioperaatorid pakuvad Mobiil-ID-d Eestis?", - "expected_output": "Mobiil-ID SIM-kaarte Eestis väljustavad kolm mobiilioperaatorit: Telia, Elisa ja Tele2. Mobiil-ID SIM-kaarte saab taotleda nende operaatorite esindustes. Igal operaatoril on Mobiil-ID väljastamiseks oma reeglid, näiteks vanusepiirangud, teenusetasud ja kasutustingimused, kuid riiklikud nõuded on kõigile ühesugused. Kõik operaatorid peavad järgima samu riiklikke nõudeid, tagades Mobiil-ID ohutuse ja ühesuguse toimimise sõltumata kasutatavast operaatorist.", - "category": "digital_identity", - "language": "et" - }, - { - "input": "Kuidas taotleda uut ID-kaardi koodiümbrikku?", - "expected_output": "Uue koodiümbriku taotlemiseks tuleb pöörduda Politsei- ja Piirivalveameti teenindusse või Eesti Vabariigi välisesindusse. Kaasa tuleb võtta isikut tõendav dokument. Võimalusel valida mobiilne pilet enne teeninduse külastamist. Maksa riigilõiv, mis rakendub koodiümbriku väljastamisel. Riigilõivu määrad leiad Politsei- ja Piirivalveameti kodulehelt. ui sinu ID-kaart on väljastatud pärast 17.11.2025, saad PUK-koodi vaadata ka Politsei- ja Piirivalveameti iseteenindusest, valides teenuse “ID-kaardi PUK-i vaatamine“", - "category": "id_card_usage", - "language": "et" - }, - { - "input": "Mis on eIDAS määrus?", - "expected_output": "eIDAS määrus (electronic IDentification, Authentication and trust Services) on Euroopa Liidus kehtiv e-identimise ja e-tehingute määrus, mille eesmärk on lihtsustada piiriülest elektrooniliste teenuste tarbimist ühtsustatud standardite ja tegutsemispõhimõtete kaudu. Määrus võeti vastu 23. juulil 2014 ja alates 1. juulist 2016 peavad Euroopa Liidu riigid tunnustama teineteise e-allkirju ning Eesti kodanike digiallkirju peab aktsepteerima ka teiste EL liikmesriikide avaliku sektori asutused.", - "category": "digital_identity", - "language": "et" - }, - { - "input": "Why am I getting an error when trying to sign documents in DigiDoc4?", - "expected_output": "This error occurs when your computer's clock time differs from the validation confirmation time. The system needs synchronized time to verify digital signatures properly.To fix this: Windows: Click the Start menu (Windows logo) in the bottom left corner, type \"Control Panel\" and open it. Then go to \"Date and Time\" settings and ensure your date, time, and time zone are correct. macOS: Open Spotlight Search and type \"System Settings\". Select \"General\", then adjust settings in \"Language & Region\" and \"Date & Time\". Ubuntu: Click \"Show applications\" icon in the bottom left corner, search for \"Date & time\" and open it. Before making changes, disable automatic configuration first. If the error persists: Disconnect any active VPN connection, restart your device, and disable antivirus/security software temporarily. If problems continue, contact ID support through their web form.", - "category": "digital_identity", - "language": "en" - }, - { - "input": "What is an EORI number and do I need one?", - "expected_output": "An EORI (Economic Operators Registration and Identification) number is mandatory for businesses engaged in foreign trade when importing goods, transit shipments, exporting, or performing other customs operations. If you're doing any cross-border trade within or outside the EU, you need an EORI number. To get one: In Estonia, the Tax and Customs Board issues EORI numbers. You can apply electronically through the e-MTA service portal at https://maasikas.emta.ee/login/?lang=et. After logging in, select \"Customs\" → \"Rights and obligations\" → \"Economic Operators Registration and Identification number (EORI)\". You need access rights to submit an application - see the access rights section for details. If you don't have access to e-MTA, you can submit your application at a Tax and Customs Board service location.", - "category": "emta", - "language": "en" - }, - { - "input": "How do I check if a business partner is trustworthy before doing business with them?", - "expected_output": "You should verify your business partner's background through multiple public databases and sources. This helps you avoid tax fraud chains and ensure your partner can fulfill their obligations. Here's how to check them: Essential checks: VAT registration: Check if they have a valid Estonian VAT number (KMKR) at https://apps.emta.ee/saqu/public/kmkrnr?lang=et. Without a valid VAT number, the seller cannot add VAT to transactions and the buyer cannot deduct input VAT. EU VAT number: For EU partners, verify their VAT number at https://ec.europa.eu/taxation_customs/vies/#/vat-validation. Check that the name matches the VAT number. Tax debt: Check for tax debts at https://apps.emta.ee/saqu/public/taxdebt?lang=et. Tax debt may indicate payment difficulties or problems meeting deadlines. Public data query: Get comprehensive company information at https://maasikas.emta.ee/rating/search including address, declared turnover, paid taxes, number of employees, outstanding declarations, and business bans. Business Register: Check company details at https://ariregister.rik.ee/ including registration code, legal form, share capital, and annual reports. Additional verification: Request tax data certificates directly through e-MTA - partners can send these showing their labor, salary, turnover, and tax debt data Check paid taxes tables to see how much they've paid in taxes and their declared turnover Search online for articles, forum discussions, payment default registers, and court decisions about the partner Check the Economic Activities Register (MTR) at https://mtr.ttja.ee/ for licenses and permits Use credit info databases like Inforegister, Creditinfo Estonia, or Äripäeva Infopank for credit ratings and payment behavior", - "category": "emta", - "language": "en" - }, - { - "input": "Kas ma saan kontrollida, kas minu äripartneril on karistusregister?", - "expected_output": "Jah, saate kontrollida karistusregistrit aadressil http://www.rik.ee/et/karistusregister 4 euro eest. Register sisaldab andmeid karistatud isikute (sh juriidiliste isikute) ja nende karistuste kohta. Kui registrist nähtub varasemaid maksualaseid süütegusid, on see riskitegur, et nendega äri tehes võite sattuda maksupettuse ahelasse. Korduvad rikkumised võivad viidata madalale seaduskuulekusele ja muuta tehingud riskantseks.", - "category": "emta", - "language": "et" - }, - { - "input": "Как проверить, заслуживает ли деловой партнер доверия, перед началом сотрудничества?", - "expected_output": "Вам следует проверить информацию о вашем деловом партнере через несколько публичных баз данных и источников. Это поможет избежать цепочек налогового мошенничества и убедиться, что партнер может выполнить свои обязательства. Как их проверить: Основные проверки: Регистрация НДС: Проверьте, есть ли у них действительный эстонский номер плательщика НДС (KMKR) на сайте https://apps.emta.ee/saqu/public/kmkrnr?lang=et. Без действительного номера плательщика НДС продавец не имеет права добавлять НДС к сделке, а покупатель не может вычесть входящий НДС. Номер НДС ЕС: Для партнеров из ЕС проверьте номер НДС на сайте https://ec.europa.eu/taxation_customs/vies/#/vat-validation. Убедитесь, что название соответствует номеру НДС. Налоговая задолженность: Проверьте наличие налоговых долгов на сайте https://apps.emta.ee/saqu/public/taxdebt?lang=et. Налоговая задолженность может указывать на платежные трудности или проблемы с соблюдением сроков. Запрос публичных данных: Получите подробную информацию о компании на сайте https://maasikas.emta.ee/rating/search, включая адрес, декларированный оборот, уплаченные налоги, количество сотрудников, непредставленные декларации и запреты на деятельность. Коммерческий регистр: Проверьте данные компании на сайте https://ariregister.rik.ee/, включая регистрационный код, юридическую форму, уставный капитал и годовые отчеты. Дополнительная проверка: Запросите справки о налоговых данных напрямую через e-MTA - партнеры могут отправлять их, показывая данные о рабочей силе, зарплате, обороте и налоговой задолженности Проверьте таблицы уплаченных налогов, чтобы увидеть, сколько они заплатили налогов и декларировали оборот Поищите в интернете статьи, темы обсуждений на форумах, результаты регистра платежных нарушений и судебные решения о партнере Проверьте Регистр экономической деятельности (MTR) на сайте https://mtr.ttja.ee/ на наличие лицензий и разрешений Используйте базы данных кредитной информации, такие как Inforegister, Creditinfo Estonia или Äripäeva Infopank для кредитных рейтингов и платежного поведения", - "category": "emta", - "language": "ru" - }, - { - "input": "Kus saab ID-kaarti isikut tõendava dokumendina kasutada?", - "expected_output": "ID-kaarti saab kasutada isikut tõendava dokumendina, digitaalsete teenuste kasutamiseks, nagu internetipank ja e-maksuamet, samuti digitaalse allkirja andmiseks ja dokumentide krüpteerimiseks. Samuti on võimalik seda kasutada Euroopa Liidu ja Euroopa Majanduspiirkonna piires reisidokumendina.", - "category": "id_card_usage", - "language": "et" - }, - { - "input": "Kuidas aktiveerida Mobiil-ID?", - "expected_output": "Mobiil-ID aktiveerimine toimub operaatorite iseteeninduses (Telia, Elisa, Tele2).", - "category": "digital_identity", - "language": "et" - }, - { - "input": "Mis on Eesti sotsiaaluuring ja miks ma peaksin osalema?", - "expected_output": "Eesti sotsiaaluuring aitab hinnata leibkondade ja isikute sissetulekute jaotust, elamistingimusi ning sotsiaalset tõrjutust. Samasugust uuringut korraldatakse kõikides Euroopa Liidu riikides. Teie osalemine aitab kujundada Eesti sotsiaalpoliitikat ja arendada teenuseid. Miks see on oluline: Need andmed aitavad riigijuhtidel, teadlastel ja arvamusliidridel mõista inimeste toimetulekut, vaesust, ilmajäetust, sissetulekut, elamistingimusi ja arstiabi kättesaadavust. Iga uuringusse sattunud inimene esindab umbes sadat Eesti elanikku, seega on teie vastused olulised. Kuidas osaleda: Kui teid valitakse, saate teavituskirja e-postile (rahvastikuregistris märgitud aadressile) või postiga. Saate küsimustikule vastata veebis aadressil https://estat.stat.ee, logides sisse ID-kaardi, mobiil-ID või Smart-ID-ga, või osaleda telefoniintervjuus. Uuring koosneb kahest osast: leibkonnaküsimustik (vastab põhiisik) ja isikuküsimustik (võib vastata iga vähemalt 15-aastane leibkonnaliige). Osalemine on vabatahtlik, kuid Statistikaamet kaitseb kõigi vastajate andmeid riikliku statistika seaduse ja isikuandmete kaitse seaduse alusel. Iga leibkonda küsitletakse neli aastat järjest 12 kuu tagant.", - "category": "statistics", - "language": "et" - }, - { - "input": "Kas ma saan kodus elektritöid ise teha või vajan spetsialisti?", - "expected_output": "Enamik elektritöid nõuab ohutuse huvides kvalifitseeritud spetsialisti. Valesti tehtud elektritöö võib põhjustada elektrilöögi ning tulekahjuohtu. Siiski saate lihtsamaid töid ise teha, kui teil on vastavad teadmised. Mida VÕITE ise teha: Vahetada lüliteid, pistikupesi, lambipesi ja kaitsmeid (kuid MITTE paigaldada uusi) Parandada ja asendada juhtmelüliteid, lambipesi, pikendusjuhtmeid ja juhtmepistikuid Milleks PEATE palkama spetsialisti: Uute elektripaigaldiste ehitamine Uute pistikupesade ja lülitite paigaldamine Kohtkindlate kodumasinate ühendamine ja lahti ühendamine Kaitsekontaktita (maandamata) pistikupesade vahetamine kaitsekontaktiga (maandatud) pistikupesade vastu Elektritöö ettevõtjad peavad olema esitanud majandustegevuse registrisse majandustegevuseteatise ning neil peab olema tööde eest vastutav kompetentne elektritöö juht.", - "category": "ttja", - "language": "et" - }, - { - "input": "What is an electrical installation audit and when do I need one?", - "expected_output": "An electrical installation audit checks whether your electrical system meets safety requirements and is safe to use. During the audit, the auditor visually assesses the installation's condition, reviews documentation and test/measurement results, and performs additional control measurements if necessary. When you need an audit: Before commissioning: Required before putting a new or renovated building's electrical installation into use Periodic audits: Regular checks at intervals depending on the installation type and age. While not mandatory for residential spaces (private houses, apartments, summer cottages), you should still periodically check them to ensure safety and functionality How to get an audit: Only contractors with appropriate accreditation can perform audits Results and documents are digitally formatted in TTJA's information system at https://jvis.ttja.ee, where they're always accessible to the electrical installation owner For residential electrical system checks, contact a competent electrical professional or auditor who will perform necessary operations and provide feedback on the system's condition and safety.", - "category": "ttja", - "language": "en" - }, - { - "input": "Mis on .asice ja .bdoc failide vahe?", - "expected_output": "Mõlemad on BDOC digiallkirjaformaadi alamformaadid. .asice (ASiC-E LT) on BDOC allkiri ajatempliga ja on parima rahvusvahelise ühilduvusega. DigiDoc4 kasutab kõikide uute dokumentide allkirjastamisel automaatselt .asice vormingut. .bdoc formaati kasutati varem, kuid uued allkirjad luuakse .asice formaadis.", - "category": "digital_signature", - "language": "et" - }, - { - "input": "How long is the e-residency digi-ID valid for?", - "expected_output": "The e-residency digi-ID is valid for 5 years", - "category": "digital_identity", - "language": "en" - }, - { - "input": "Предоставляет ли электронное резидентство эстонское гражданство или налоговое резидентство?", - "expected_output": "Нет, электронное резидентство не предоставляет эстонское гражданство или налоговое резидентство.", - "category": "digital_identity", - "language": "ru" } ] \ No newline at end of file From bec1155c1e4349655483a0b4ac496e251d3a1182 Mon Sep 17 00:00:00 2001 From: nuwangeek Date: Wed, 21 Jan 2026 12:12:48 +0530 Subject: [PATCH 08/10] fixing internal issue in eval flow --- docker-compose-eval.yml | 7 ++++--- test-vault/agents/llm/agent.hcl | 6 +++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/docker-compose-eval.yml b/docker-compose-eval.yml index 2c6aadf..ba4cb12 100644 --- a/docker-compose-eval.yml +++ b/docker-compose-eval.yml @@ -56,7 +56,7 @@ services: - vault volumes: - ./test-vault/agents/llm:/agent/in - - ./test-vault/agent-out:/agent/out + - ./test-vault/agent-out:/agent/llm-token entrypoint: ["sh", "-c"] command: - | @@ -254,12 +254,13 @@ services: - "8100:8100" environment: - VAULT_ADDR=http://vault:8200 - - VAULT_TOKEN_FILE=/agent/out/token + - VAULT_TOKEN_FILE=/agent/llm-token/token + - ENVIRONMENT=development - QDRANT_URL=http://qdrant:6333 - EVAL_MODE=true volumes: - ./src/llm_config_module/config:/app/src/llm_config_module/config:ro - - ./test-vault/agent-out:/agent/out:ro + - ./test-vault/agent-out:/agent/llm-token:ro - test_llm_orchestration_logs:/app/logs depends_on: - qdrant diff --git a/test-vault/agents/llm/agent.hcl b/test-vault/agents/llm/agent.hcl index 9883bfe..69ffde4 100644 --- a/test-vault/agents/llm/agent.hcl +++ b/test-vault/agents/llm/agent.hcl @@ -3,7 +3,7 @@ vault { address = "http://vault:8200" } -pid_file = "/agent/out/pidfile" +pid_file = "/agent/llm-token/pidfile" auto_auth { method "approle" { @@ -17,7 +17,7 @@ auto_auth { sink "file" { config = { - path = "/agent/out/token" + path = "/agent/llm-token/token" } } } @@ -36,7 +36,7 @@ listener "tcp" { # dummy template so cache is “active” (some versions require this) template { source = "/dev/null" - destination = "/agent/out/dummy" + destination = "/agent/llm-token/dummy" } # Disable API proxy; not needed here From e477c06cab3f1aa588cc558d32ae4a44c6fab774 Mon Sep 17 00:00:00 2001 From: nuwangeek Date: Wed, 21 Jan 2026 14:38:56 +0530 Subject: [PATCH 09/10] fixed issue --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 8bbf38e..90d9c8f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -592,7 +592,7 @@ def _fix_token_file_permissions( "vault-agent-llm", "chmod", "644", - "/agent/out/token", + "/agent/llm-token/token", ], capture_output=True, text=True, From fb11fc9f7826ae8f744cfb10e577fce20269aca8 Mon Sep 17 00:00:00 2001 From: nuwangeek Date: Wed, 21 Jan 2026 17:18:20 +0530 Subject: [PATCH 10/10] added more data --- tests/data/test_dataset.json | 42 +++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/tests/data/test_dataset.json b/tests/data/test_dataset.json index f1f9c7e..e94b012 100644 --- a/tests/data/test_dataset.json +++ b/tests/data/test_dataset.json @@ -12,9 +12,45 @@ "language": "et" }, { - "input": "Mida teha, kui mu ID-kaart on kadunud", - "expected_output": "Kui sinu ID-kaart on kadunud, tuleks tegutseda järgmiselt: Mõtle läbi, kas kaart võib olla turvalises kohas – näiteks kontorilaual või mujal, kus seda hiljuti kasutasid. Peata ID-kaardiga seotud sertifikaadid, helistades numbril +372 677 3377. Sertifikaatide peatamine tähendab, et kaarti ei saa kasutada elektroonilistes kanalites, kuid see kehtib endiselt füüsilise isikut tõendava dokumendina (nt reisidokumendina). Kui oled kindel, et kaart on varastatud, teavita politseid ja lase ID-kaart kehtetuks tunnistada. Kui kaardi kadumine toimub välismaal, saad Eesti välisesindusest taotleda ajutise tagasipöördumise tunnistuse. Kui vajad täiendavat abi, võid pöörduda Politsei- ja Piirivalveameti teenindusse.", - "category": "digital_identity_security", + "input": "Mis on eIDAS määrus?", + "expected_output": "eIDAS määrus (electronic IDentification, Authentication and trust Services) on Euroopa Liidus kehtiv e-identimise ja e-tehingute määrus, mille eesmärk on lihtsustada piiriülest elektrooniliste teenuste tarbimist ühtsustatud standardite ja tegutsemispõhimõtete kaudu. Määrus võeti vastu 23. juulil 2014 ja alates 1. juulist 2016 peavad Euroopa Liidu riigid tunnustama teineteise e-allkirju ning Eesti kodanike digiallkirju peab aktsepteerima ka teiste EL liikmesriikide avaliku sektori asutused.", + "category": "digital_identity", + "language": "et" + }, + { + "input": "Why am I getting an error when trying to sign documents in DigiDoc4?", + "expected_output": "This error occurs when your computer's clock time differs from the validation confirmation time. The system needs synchronized time to verify digital signatures properly.To fix this: Windows: Click the Start menu (Windows logo) in the bottom left corner, type \"Control Panel\" and open it. Then go to \"Date and Time\" settings and ensure your date, time, and time zone are correct. macOS: Open Spotlight Search and type \"System Settings\". Select \"General\", then adjust settings in \"Language & Region\" and \"Date & Time\". Ubuntu: Click \"Show applications\" icon in the bottom left corner, search for \"Date & time\" and open it. Before making changes, disable automatic configuration first. If the error persists: Disconnect any active VPN connection, restart your device, and disable antivirus/security software temporarily. If problems continue, contact ID support through their web form.", + "category": "digital_identity", + "language": "en" + }, + { + "input": "Kuidas aktiveerida Mobiil-ID?", + "expected_output": "Mobiil-ID aktiveerimine toimub operaatorite iseteeninduses (Telia, Elisa, Tele2).", + "category": "digital_identity", + "language": "et" + }, + { + "input": "Mis on Eesti sotsiaaluuring ja miks ma peaksin osalema?", + "expected_output": "Eesti sotsiaaluuring aitab hinnata leibkondade ja isikute sissetulekute jaotust, elamistingimusi ning sotsiaalset tõrjutust. Samasugust uuringut korraldatakse kõikides Euroopa Liidu riikides. Teie osalemine aitab kujundada Eesti sotsiaalpoliitikat ja arendada teenuseid. Miks see on oluline: Need andmed aitavad riigijuhtidel, teadlastel ja arvamusliidridel mõista inimeste toimetulekut, vaesust, ilmajäetust, sissetulekut, elamistingimusi ja arstiabi kättesaadavust. Iga uuringusse sattunud inimene esindab umbes sadat Eesti elanikku, seega on teie vastused olulised. Kuidas osaleda: Kui teid valitakse, saate teavituskirja e-postile (rahvastikuregistris märgitud aadressile) või postiga. Saate küsimustikule vastata veebis aadressil https://estat.stat.ee, logides sisse ID-kaardi, mobiil-ID või Smart-ID-ga, või osaleda telefoniintervjuus. Uuring koosneb kahest osast: leibkonnaküsimustik (vastab põhiisik) ja isikuküsimustik (võib vastata iga vähemalt 15-aastane leibkonnaliige). Osalemine on vabatahtlik, kuid Statistikaamet kaitseb kõigi vastajate andmeid riikliku statistika seaduse ja isikuandmete kaitse seaduse alusel. Iga leibkonda küsitletakse neli aastat järjest 12 kuu tagant.", + "category": "statistics", + "language": "et" + }, + { + "input": "Kas ma saan kodus elektritöid ise teha või vajan spetsialisti?", + "expected_output": "Enamik elektritöid nõuab ohutuse huvides kvalifitseeritud spetsialisti. Valesti tehtud elektritöö võib põhjustada elektrilöögi ning tulekahjuohtu. Siiski saate lihtsamaid töid ise teha, kui teil on vastavad teadmised. Mida VÕITE ise teha: Vahetada lüliteid, pistikupesi, lambipesi ja kaitsmeid (kuid MITTE paigaldada uusi) Parandada ja asendada juhtmelüliteid, lambipesi, pikendusjuhtmeid ja juhtmepistikuid Milleks PEATE palkama spetsialisti: Uute elektripaigaldiste ehitamine Uute pistikupesade ja lülitite paigaldamine Kohtkindlate kodumasinate ühendamine ja lahti ühendamine Kaitsekontaktita (maandamata) pistikupesade vahetamine kaitsekontaktiga (maandatud) pistikupesade vastu Elektritöö ettevõtjad peavad olema esitanud majandustegevuse registrisse majandustegevuseteatise ning neil peab olema tööde eest vastutav kompetentne elektritöö juht.", + "category": "ttja", "language": "et" + }, + { + "input": "What is an electrical installation audit and when do I need one?", + "expected_output": "An electrical installation audit checks whether your electrical system meets safety requirements and is safe to use. During the audit, the auditor visually assesses the installation's condition, reviews documentation and test/measurement results, and performs additional control measurements if necessary. When you need an audit: Before commissioning: Required before putting a new or renovated building's electrical installation into use Periodic audits: Regular checks at intervals depending on the installation type and age. While not mandatory for residential spaces (private houses, apartments, summer cottages), you should still periodically check them to ensure safety and functionality How to get an audit: Only contractors with appropriate accreditation can perform audits Results and documents are digitally formatted in TTJA's information system at https://jvis.ttja.ee, where they're always accessible to the electrical installation owner For residential electrical system checks, contact a competent electrical professional or auditor who will perform necessary operations and provide feedback on the system's condition and safety.", + "category": "ttja", + "language": "en" + }, + { + "input": "Предоставляет ли электронное резидентство эстонское гражданство или налоговое резидентство?", + "expected_output": "Нет, электронное резидентство не предоставляет эстонское гражданство или налоговое резидентство.", + "category": "digital_identity", + "language": "ru" } ] \ No newline at end of file