From fdd0043a2a34718b2245b4f3ee4d592334c374cb Mon Sep 17 00:00:00 2001 From: Alex Mikhalev Date: Thu, 26 Mar 2026 13:17:09 +0100 Subject: [PATCH] feat(types): add layered search output (--layer 1/2/3) for token efficiency Add Layer enum (One/Two/Three) to SearchQuery controlling result detail: - Layer 1: title + tags only (~50 tokens/result) - Layer 2: + first paragraph summary (~150 tokens/result) - Layer 3: full content (current default, backwards compatible) CLI flag --layer, HTTP query parameter, and result filtering all wired. Refs #88 Co-Authored-By: Claude Opus 4.6 --- crates/terraphim_agent/src/main.rs | 91 ++++++++-- crates/terraphim_agent/src/repl/handler.rs | 3 +- crates/terraphim_agent/src/service.rs | 3 +- .../tests/cross_mode_consistency_test.rs | 3 +- .../tests/error_handling_test.rs | 7 +- .../terraphim_agent/tests/integration_test.rs | 7 +- .../tests/kg_ranking_integration_test.rs | 3 +- crates/terraphim_cli/src/service.rs | 3 +- crates/terraphim_cli/tests/service_tests.rs | 2 + crates/terraphim_mcp_server/src/lib.rs | 6 +- .../tests/atlassian_ripgrep_integration.rs | 1 + .../tests/clickup_haystack_test.rs | 2 + .../tests/mcp_haystack_test.rs | 1 + .../tests/perplexity_haystack_test.rs | 2 + crates/terraphim_service/src/lib.rs | 6 +- .../logical_operators_fix_validation_test.rs | 10 +- .../logical_operators_integration_test.rs | 11 +- .../tests/ollama_llama_integration_test.rs | 3 +- .../terraphim_graph_lexical_fallback_test.rs | 1 + .../tests/weighted_haystack_ranking_test.rs | 6 +- crates/terraphim_types/src/lib.rs | 169 ++++++++++++++++++ .../src/testing/server_api/fixtures.rs | 3 +- scripts/tests/test_operators_debug.rs | 5 +- .../tests/terraphim_graph_search_test.rs | 4 +- .../tests/tui_desktop_parity_test.rs | 7 +- 25 files changed, 327 insertions(+), 32 deletions(-) diff --git a/crates/terraphim_agent/src/main.rs b/crates/terraphim_agent/src/main.rs index d806ff362..bd03a838d 100644 --- a/crates/terraphim_agent/src/main.rs +++ b/crates/terraphim_agent/src/main.rs @@ -38,7 +38,10 @@ mod repl; use client::{ApiClient, SearchResponse}; use service::TuiService; -use terraphim_types::{Document, LogicalOperator, NormalizedTermValue, RoleName, SearchQuery}; +use terraphim_types::{ + Document, Layer, LogicalOperator, NormalizedTermValue, RoleName, SearchQuery, + extract_first_paragraph, +}; use terraphim_update::{check_for_updates, check_for_updates_startup, update_binary}; #[derive(clap::ValueEnum, Debug, Clone)] @@ -475,6 +478,10 @@ struct SearchDocumentOutput { title: String, url: String, rank: Option, + #[serde(skip_serializing_if = "Option::is_none")] + tags: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + summary: Option, } #[derive(Debug, Serialize)] @@ -485,6 +492,38 @@ struct SearchOutput { results: Vec, } +/// Extension trait to convert Document to layered output +impl SearchDocumentOutput { + fn from_document(doc: &Document, layer: &Layer) -> Self { + match layer { + Layer::One => Self { + id: doc.id.clone(), + title: doc.title.clone(), + url: doc.url.clone(), + rank: doc.rank, + tags: doc.tags.clone(), + summary: None, + }, + Layer::Two => Self { + id: doc.id.clone(), + title: doc.title.clone(), + url: doc.url.clone(), + rank: doc.rank, + tags: doc.tags.clone(), + summary: Some(extract_first_paragraph(&doc.body)), + }, + Layer::Three => Self { + id: doc.id.clone(), + title: doc.title.clone(), + url: doc.url.clone(), + rank: doc.rank, + tags: doc.tags.clone(), + summary: None, // For full content, summary not needed + }, + } + } +} + fn print_json_output(value: &T, mode: CommandOutputMode) -> Result<()> { let out = match mode { CommandOutputMode::Human => serde_json::to_string_pretty(value)?, @@ -540,6 +579,9 @@ enum Command { role: Option, #[arg(long, default_value_t = 10)] limit: usize, + /// Output layer: 1=minimal (title+tags), 2=summary, 3=full (default) + #[arg(long, default_value_t = 3, value_name = "1|2|3")] + layer: u8, }, /// Manage roles (list, select) Roles { @@ -1130,6 +1172,7 @@ async fn run_offline_command( operator, role, limit, + layer, } => { let role_name = if let Some(role) = role { RoleName::new(&role) @@ -1137,6 +1180,10 @@ async fn run_offline_command( service.get_selected_role().await }; + // Parse and validate layer + let layer = + terraphim_types::Layer::from_u8(layer).unwrap_or(terraphim_types::Layer::Three); + let results = if let Some(additional_terms) = terms { // Multi-term query with logical operators let mut all_terms = vec![query.clone()]; @@ -1170,6 +1217,7 @@ async fn run_offline_command( skip: Some(0), limit: Some(limit), role: Some(role_name.clone()), + layer, }; service.search_with_query(&search_query).await? @@ -1182,23 +1230,33 @@ async fn run_offline_command( if output.is_machine_readable() { let payload = SearchOutput { - query, + query: query.clone(), role: role_name.to_string(), count: results.len(), results: results .iter() - .map(|doc| SearchDocumentOutput { - id: doc.id.clone(), - title: doc.title.clone(), - url: doc.url.clone(), - rank: doc.rank, - }) + .map(|doc| SearchDocumentOutput::from_document(doc, &layer)) .collect(), }; print_json_output(&payload, output.mode)?; } else { for doc in results.iter() { - println!("- {}\t{}", doc.rank.unwrap_or_default(), doc.title); + match layer { + Layer::One => { + println!("- {}\t{}", doc.rank.unwrap_or_default(), doc.title); + } + Layer::Two => { + let summary = extract_first_paragraph(&doc.body); + println!("- {}\t{}", doc.rank.unwrap_or_default(), doc.title); + println!(" {}", summary); + } + Layer::Three => { + println!("- {}\t{}", doc.rank.unwrap_or_default(), doc.title); + if let Some(ref tags) = doc.tags { + println!(" Tags: {}", tags.join(", ")); + } + } + } } } Ok(()) @@ -2172,6 +2230,7 @@ async fn run_server_command( operator, role, limit, + layer, } => { // Get selected role from server if not specified let role_name = if let Some(role) = role { @@ -2181,6 +2240,10 @@ async fn run_server_command( config_res.config.selected_role }; + // Parse and validate layer + let layer = + terraphim_types::Layer::from_u8(layer).unwrap_or(terraphim_types::Layer::Three); + let q = if let Some(additional_terms) = terms { // Multi-term query with logical operators let search_terms: Vec = additional_terms @@ -2195,6 +2258,7 @@ async fn run_server_command( skip: Some(0), limit: Some(limit), role: Some(role_name), + layer, } } else { // Single term query (backward compatibility) @@ -2205,6 +2269,7 @@ async fn run_server_command( skip: Some(0), limit: Some(limit), role: Some(role_name), + layer, } }; @@ -2239,12 +2304,7 @@ async fn run_server_command( results: res .results .iter() - .map(|doc| SearchDocumentOutput { - id: doc.id.clone(), - title: doc.title.clone(), - url: doc.url.clone(), - rank: doc.rank, - }) + .map(|doc| SearchDocumentOutput::from_document(doc, &layer)) .collect(), }; print_json_output(&payload, output.mode)?; @@ -2955,6 +3015,7 @@ fn ui_loop( skip: Some(0), limit: Some(10), role: Some(RoleName::new(&role)), + layer: Layer::default(), }; let resp = api.search(&q).await?; let lines: Vec = resp diff --git a/crates/terraphim_agent/src/repl/handler.rs b/crates/terraphim_agent/src/repl/handler.rs index d8c333ce2..5dbcee483 100644 --- a/crates/terraphim_agent/src/repl/handler.rs +++ b/crates/terraphim_agent/src/repl/handler.rs @@ -409,7 +409,7 @@ impl ReplHandler { } } else if let Some(api_client) = &self.api_client { // Server mode - use current role if no role specified - use terraphim_types::{NormalizedTermValue, RoleName, SearchQuery}; + use terraphim_types::{Layer, NormalizedTermValue, RoleName, SearchQuery}; let effective_role = role.unwrap_or_else(|| self.current_role.clone()); let role_name = Some(RoleName::new(&effective_role)); @@ -420,6 +420,7 @@ impl ReplHandler { skip: Some(0), limit, role: role_name, + layer: Layer::default(), }; match api_client.search(&search_query).await { diff --git a/crates/terraphim_agent/src/service.rs b/crates/terraphim_agent/src/service.rs index 90efcb0a2..aefdc226f 100644 --- a/crates/terraphim_agent/src/service.rs +++ b/crates/terraphim_agent/src/service.rs @@ -5,7 +5,7 @@ use terraphim_persistence::Persistable; use terraphim_service::TerraphimService; use terraphim_service::llm::{ChatOptions, build_llm_from_role}; use terraphim_settings::{DeviceSettings, Error as DeviceSettingsError}; -use terraphim_types::{Document, NormalizedTermValue, RoleName, SearchQuery, Thesaurus}; +use terraphim_types::{Document, Layer, NormalizedTermValue, RoleName, SearchQuery, Thesaurus}; use tokio::sync::Mutex; #[derive(Clone)] @@ -254,6 +254,7 @@ impl TuiService { skip: Some(0), limit, role: Some(role.clone()), + layer: Layer::default(), }; let mut service = self.service.lock().await; diff --git a/crates/terraphim_agent/tests/cross_mode_consistency_test.rs b/crates/terraphim_agent/tests/cross_mode_consistency_test.rs index d5ba2f680..178c01d4c 100644 --- a/crates/terraphim_agent/tests/cross_mode_consistency_test.rs +++ b/crates/terraphim_agent/tests/cross_mode_consistency_test.rs @@ -17,7 +17,7 @@ use std::time::Duration; use anyhow::Result; use serial_test::serial; use terraphim_agent::client::ApiClient; -use terraphim_types::{NormalizedTermValue, RoleName, SearchQuery}; +use terraphim_types::{Layer, NormalizedTermValue, RoleName, SearchQuery}; /// Result structure normalized across all modes #[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)] @@ -244,6 +244,7 @@ async fn search_via_server( skip: Some(0), limit: Some(10), role: Some(RoleName::new(role)), + layer: Layer::default(), }; let response = client.search(&search_query).await?; diff --git a/crates/terraphim_agent/tests/error_handling_test.rs b/crates/terraphim_agent/tests/error_handling_test.rs index b7d9abb24..32d8d29e2 100644 --- a/crates/terraphim_agent/tests/error_handling_test.rs +++ b/crates/terraphim_agent/tests/error_handling_test.rs @@ -2,7 +2,7 @@ use std::time::Duration; use serial_test::serial; use terraphim_agent::client::ApiClient; -use terraphim_types::{Document, DocumentType, NormalizedTermValue, RoleName, SearchQuery}; +use terraphim_types::{Document, DocumentType, Layer, NormalizedTermValue, RoleName, SearchQuery}; use tokio::time::timeout; const TEST_SERVER_URL: &str = "http://localhost:8000"; @@ -92,6 +92,7 @@ async fn test_malformed_server_response() { skip: Some(0), limit: Some(100000), // Extremely large limit role: Some(RoleName::new("Default")), + layer: Layer::default(), }; let result = client.search(&extreme_query).await; @@ -137,6 +138,7 @@ async fn test_invalid_role_handling() { skip: Some(0), limit: Some(5), role: Some(RoleName::new("CompleteLyInvalidRoleName12345")), + layer: Layer::default(), }; let result = client.search(&invalid_query).await; @@ -200,6 +202,7 @@ async fn test_empty_and_special_character_queries() { skip: Some(0), limit: Some(5), role: Some(RoleName::new("Default")), + layer: Layer::default(), }; let result = client.search(&search_query).await; @@ -253,6 +256,7 @@ async fn test_concurrent_request_handling() { skip: Some(0), limit: Some(3), role: Some(RoleName::new("Default")), + layer: Layer::default(), }; client_clone.search(&query).await }); @@ -517,6 +521,7 @@ async fn test_graceful_degradation() { skip: Some(0), limit: Some(1), role: Some(RoleName::new("Default")), + layer: Layer::default(), }; client .search(&query) diff --git a/crates/terraphim_agent/tests/integration_test.rs b/crates/terraphim_agent/tests/integration_test.rs index 397401ec3..2d04da7e9 100644 --- a/crates/terraphim_agent/tests/integration_test.rs +++ b/crates/terraphim_agent/tests/integration_test.rs @@ -4,7 +4,7 @@ use std::time::Duration; use anyhow::Result; use serial_test::serial; use terraphim_agent::client::{ApiClient, ChatResponse, ConfigResponse, SearchResponse}; -use terraphim_types::{NormalizedTermValue, RoleName, SearchQuery}; +use terraphim_types::{Layer, NormalizedTermValue, RoleName, SearchQuery}; const TEST_SERVER_URL: &str = "http://localhost:8000"; #[allow(dead_code)] @@ -58,6 +58,7 @@ async fn test_api_client_search() { skip: Some(0), limit: Some(5), role: Some(RoleName::new("Terraphim Engineer")), + layer: Layer::default(), }; let result = client.search(&query).await; @@ -203,6 +204,7 @@ async fn test_search_with_different_roles() { skip: Some(0), limit: Some(3), role: Some(RoleName::new(role_name)), + layer: Layer::default(), }; let result = client.search(&query).await; @@ -244,6 +246,7 @@ async fn test_search_pagination() { skip: Some(0), limit: Some(2), role: Some(RoleName::new("Default")), + layer: Layer::default(), }; let result1 = client.search(&query1).await; @@ -257,6 +260,7 @@ async fn test_search_pagination() { skip: Some(2), limit: Some(2), role: Some(RoleName::new("Default")), + layer: Layer::default(), }; let result2 = client.search(&query2).await; @@ -449,6 +453,7 @@ async fn test_api_error_handling() { skip: Some(0), limit: Some(0), // Invalid limit role: Some(RoleName::new("NonExistentRole")), + layer: Layer::default(), }; let result = client.search(&query).await; diff --git a/crates/terraphim_agent/tests/kg_ranking_integration_test.rs b/crates/terraphim_agent/tests/kg_ranking_integration_test.rs index 41fa38aae..8a3efa6b0 100644 --- a/crates/terraphim_agent/tests/kg_ranking_integration_test.rs +++ b/crates/terraphim_agent/tests/kg_ranking_integration_test.rs @@ -22,7 +22,7 @@ use anyhow::Result; use serial_test::serial; use terraphim_agent::client::ApiClient; -use terraphim_types::{Document, NormalizedTermValue, RoleName, SearchQuery}; +use terraphim_types::{Document, Layer, NormalizedTermValue, RoleName, SearchQuery}; /// Get workspace root directory fn get_workspace_root() -> Result { @@ -276,6 +276,7 @@ async fn search_via_server( skip: Some(0), limit: Some(20), role: Some(RoleName::new(role)), + layer: Layer::default(), }; let response = client.search(&search_query).await?; diff --git a/crates/terraphim_cli/src/service.rs b/crates/terraphim_cli/src/service.rs index cd91a4ffd..02cc681e7 100644 --- a/crates/terraphim_cli/src/service.rs +++ b/crates/terraphim_cli/src/service.rs @@ -8,7 +8,7 @@ use terraphim_persistence::Persistable; use terraphim_service::TerraphimService; use terraphim_settings::{DeviceSettings, Error as DeviceSettingsError}; use terraphim_types::{ - CoverageSignal, Document, ExtractedEntity, GroundingMetadata, NormalizationMethod, + CoverageSignal, Document, ExtractedEntity, GroundingMetadata, Layer, NormalizationMethod, NormalizedTerm, NormalizedTermValue, OntologySchema, RoleName, SchemaSignal, SearchQuery, Thesaurus, }; @@ -265,6 +265,7 @@ impl CliService { skip: Some(0), limit, role: Some(role.clone()), + layer: Layer::default(), }; let mut service = self.service.lock().await; diff --git a/crates/terraphim_cli/tests/service_tests.rs b/crates/terraphim_cli/tests/service_tests.rs index 83dfce023..7c89a0450 100644 --- a/crates/terraphim_cli/tests/service_tests.rs +++ b/crates/terraphim_cli/tests/service_tests.rs @@ -205,6 +205,7 @@ mod search_query_tests { skip: Some(0), limit: Some(10), role: Some(RoleName::new("Default")), + layer: Default::default(), }; assert_eq!(query.search_term.to_string(), "rust async"); @@ -221,6 +222,7 @@ mod search_query_tests { skip: None, limit: None, role: None, + layer: Default::default(), }; assert!(query.role.is_none()); diff --git a/crates/terraphim_mcp_server/src/lib.rs b/crates/terraphim_mcp_server/src/lib.rs index 7bfbc07bb..31632ec2c 100644 --- a/crates/terraphim_mcp_server/src/lib.rs +++ b/crates/terraphim_mcp_server/src/lib.rs @@ -15,7 +15,7 @@ use terraphim_automata::matcher::{extract_paragraphs_from_automata, find_matches use terraphim_automata::{AutocompleteConfig, AutocompleteIndex, AutocompleteResult}; use terraphim_config::{Config, ConfigState}; use terraphim_service::TerraphimService; -use terraphim_types::{NormalizedTermValue, RoleName, SearchQuery}; +use terraphim_types::{Layer, NormalizedTermValue, RoleName, SearchQuery}; use thiserror::Error; use tracing::{error, info}; @@ -125,6 +125,7 @@ impl McpService { role: Some(role_name), limit: limit.map(|l| l as usize), skip: skip.map(|s| s as usize), + layer: Layer::default(), }; match service.search(&search_query).await { @@ -435,6 +436,7 @@ impl McpService { role: None, limit: Some(1), skip: Some(0), + layer: Layer::default(), }; let snippet = match service.search(&sq).await { Ok(documents) if !documents.is_empty() => { @@ -1913,6 +1915,7 @@ impl ServerHandler for McpService { limit: Some(50), // Reasonable limit per search skip: None, role: None, + layer: terraphim_types::Layer::default(), }; match service.search(&search_query).await { @@ -1937,6 +1940,7 @@ impl ServerHandler for McpService { limit: Some(100), skip: None, role: None, + layer: terraphim_types::Layer::default(), }; let documents = service diff --git a/crates/terraphim_middleware/tests/atlassian_ripgrep_integration.rs b/crates/terraphim_middleware/tests/atlassian_ripgrep_integration.rs index c5298409b..6a86a4d78 100644 --- a/crates/terraphim_middleware/tests/atlassian_ripgrep_integration.rs +++ b/crates/terraphim_middleware/tests/atlassian_ripgrep_integration.rs @@ -48,6 +48,7 @@ async fn atlassian_ripgrep_haystack_smoke() { role: Some("Atlassian".into()), operator: Some(terraphim_types::LogicalOperator::And), search_terms: None, + layer: Default::default(), }; let result = search_haystacks(config_state, query).await; assert!( diff --git a/crates/terraphim_middleware/tests/clickup_haystack_test.rs b/crates/terraphim_middleware/tests/clickup_haystack_test.rs index 10ab1db1f..cb2c4fc49 100644 --- a/crates/terraphim_middleware/tests/clickup_haystack_test.rs +++ b/crates/terraphim_middleware/tests/clickup_haystack_test.rs @@ -66,6 +66,7 @@ async fn clickup_live_search_returns_documents() { role: Some("ClickUp".into()), operator: None, search_terms: None, + layer: Default::default(), }; let _results = search_haystacks(config_state, query).await.unwrap(); // Results should be empty or non-empty (both are valid for this test) @@ -117,6 +118,7 @@ async fn clickup_live_search_work_term() { role: Some("ClickUp".into()), operator: None, search_terms: None, + layer: Default::default(), }; let results = search_haystacks(config_state, query).await.unwrap(); assert!( diff --git a/crates/terraphim_middleware/tests/mcp_haystack_test.rs b/crates/terraphim_middleware/tests/mcp_haystack_test.rs index b5665ca7b..eb0caf9b5 100644 --- a/crates/terraphim_middleware/tests/mcp_haystack_test.rs +++ b/crates/terraphim_middleware/tests/mcp_haystack_test.rs @@ -59,6 +59,7 @@ async fn mcp_live_haystack_smoke() { role: Some("MCP".into()), operator: None, search_terms: None, + layer: Default::default(), }; let result = search_haystacks(config_state, query) diff --git a/crates/terraphim_middleware/tests/perplexity_haystack_test.rs b/crates/terraphim_middleware/tests/perplexity_haystack_test.rs index 239045b13..add3d2cb9 100644 --- a/crates/terraphim_middleware/tests/perplexity_haystack_test.rs +++ b/crates/terraphim_middleware/tests/perplexity_haystack_test.rs @@ -225,6 +225,7 @@ async fn test_perplexity_missing_api_key() { skip: Some(0), limit: Some(10), role: Some("Perplexity Test".into()), + layer: Default::default(), }; let result = search_haystacks(config_state, query).await; @@ -323,6 +324,7 @@ async fn perplexity_live_api_test() { skip: Some(0), limit: Some(5), role: Some("Perplexity Live Test".into()), + layer: Default::default(), }; println!("Sending query: {}", query.search_term.as_str()); diff --git a/crates/terraphim_service/src/lib.rs b/crates/terraphim_service/src/lib.rs index 35fe39a77..0a7ca7dbe 100644 --- a/crates/terraphim_service/src/lib.rs +++ b/crates/terraphim_service/src/lib.rs @@ -8,7 +8,7 @@ use terraphim_middleware::thesaurus::build_thesaurus_from_haystack; use terraphim_persistence::Persistable; use terraphim_rolegraph::{RoleGraph, RoleGraphSync}; use terraphim_types::{ - Document, Index, IndexedDocument, NormalizedTermValue, RelevanceFunction, RoleName, + Document, Index, IndexedDocument, Layer, NormalizedTermValue, RelevanceFunction, RoleName, SearchQuery, Thesaurus, }; mod score; @@ -1025,6 +1025,7 @@ impl TerraphimService { limit: Some(5), // Get a few results to check titles skip: None, role: None, + layer: Layer::default(), }; let documents = self.search(&search_query).await?; @@ -1363,6 +1364,7 @@ impl TerraphimService { role: Some(role), skip: None, limit: None, + layer: Layer::default(), }) .await?; Ok(documents) @@ -3057,6 +3059,7 @@ mod tests { limit: Some(10), skip: None, role: Some(role_name), + layer: Layer::default(), }; // Test that Atomic Data URLs are skipped during persistence lookup @@ -3170,6 +3173,7 @@ mod tests { limit: Some(10), skip: None, role: Some(role_name), + layer: Layer::default(), }; let result = service.search(&search_query).await; diff --git a/crates/terraphim_service/tests/logical_operators_fix_validation_test.rs b/crates/terraphim_service/tests/logical_operators_fix_validation_test.rs index 685f81418..9b520d682 100644 --- a/crates/terraphim_service/tests/logical_operators_fix_validation_test.rs +++ b/crates/terraphim_service/tests/logical_operators_fix_validation_test.rs @@ -2,7 +2,9 @@ mod logical_operators_fix_validation_tests { use terraphim_config::{ConfigBuilder, ConfigId, ConfigState}; use terraphim_service::TerraphimService; - use terraphim_types::{Document, LogicalOperator, NormalizedTermValue, RoleName, SearchQuery}; + use terraphim_types::{ + Document, Layer, LogicalOperator, NormalizedTermValue, RoleName, SearchQuery, + }; async fn setup_test_service() -> TerraphimService { let mut config = ConfigBuilder::new_with_id(ConfigId::Embedded) @@ -98,6 +100,7 @@ mod logical_operators_fix_validation_tests { skip: Some(0), limit: Some(10), role: Some(RoleName::from("Default")), + layer: Layer::default(), }; // Test get_all_terms to ensure no duplication @@ -156,6 +159,7 @@ mod logical_operators_fix_validation_tests { skip: Some(0), limit: Some(10), role: Some(RoleName::from("Default")), + layer: Layer::default(), }; // Test get_all_terms to ensure no duplication @@ -220,6 +224,7 @@ mod logical_operators_fix_validation_tests { skip: Some(0), limit: Some(10), role: Some(RoleName::from("Default")), + layer: Layer::default(), }; // Test get_all_terms for single term @@ -297,6 +302,7 @@ mod logical_operators_fix_validation_tests { skip: Some(0), limit: Some(10), role: Some(RoleName::from("Default")), + layer: Layer::default(), }; let result = service @@ -334,6 +340,7 @@ mod logical_operators_fix_validation_tests { skip: Some(0), limit: Some(10), role: Some(RoleName::from("Default")), + layer: Layer::default(), }; // Test get_all_terms @@ -375,6 +382,7 @@ mod logical_operators_fix_validation_tests { skip: Some(0), limit: Some(10), role: Some(RoleName::from("Default")), + layer: Layer::default(), }; // Test get_all_terms diff --git a/crates/terraphim_service/tests/logical_operators_integration_test.rs b/crates/terraphim_service/tests/logical_operators_integration_test.rs index be7675af4..4ca6b1140 100644 --- a/crates/terraphim_service/tests/logical_operators_integration_test.rs +++ b/crates/terraphim_service/tests/logical_operators_integration_test.rs @@ -2,7 +2,7 @@ mod logical_operators_integration_tests { use terraphim_config::{ConfigBuilder, ConfigId, ConfigState}; use terraphim_service::TerraphimService; - use terraphim_types::{LogicalOperator, NormalizedTermValue, RoleName, SearchQuery}; + use terraphim_types::{Layer, LogicalOperator, NormalizedTermValue, RoleName, SearchQuery}; async fn setup_test_service() -> TerraphimService { let mut config = ConfigBuilder::new_with_id(ConfigId::Embedded) @@ -27,6 +27,7 @@ mod logical_operators_integration_tests { skip: Some(0), limit: Some(10), role: Some(RoleName::from("Default")), + layer: Layer::default(), }; // Test that the search executes without error @@ -60,6 +61,7 @@ mod logical_operators_integration_tests { skip: Some(0), limit: Some(10), role: Some(RoleName::from("Default")), + layer: Layer::default(), }; let result = service.search(&query).await; @@ -87,6 +89,7 @@ mod logical_operators_integration_tests { skip: Some(0), limit: Some(10), role: Some(RoleName::from("Default")), + layer: Layer::default(), }; let result = service.search(&query).await; @@ -114,6 +117,7 @@ mod logical_operators_integration_tests { skip: Some(0), limit: Some(10), role: Some(RoleName::from("Default")), + layer: Layer::default(), }; let result = service.search(&query).await; @@ -140,6 +144,7 @@ mod logical_operators_integration_tests { skip: Some(0), limit: Some(10), role: Some(RoleName::from("Default")), + layer: Layer::default(), }; let result = service.search(&query).await; @@ -169,6 +174,7 @@ mod logical_operators_integration_tests { skip: Some(0), limit: Some(10), role: Some(RoleName::from("Default")), + layer: Layer::default(), }; let result = service.search(&query).await; @@ -197,6 +203,7 @@ mod logical_operators_integration_tests { skip: Some(0), limit: Some(3), role: Some(RoleName::from("Default")), + layer: Layer::default(), }; let result1 = service.search(&query1).await; @@ -214,6 +221,7 @@ mod logical_operators_integration_tests { skip: Some(3), limit: Some(3), role: Some(RoleName::from("Default")), + layer: Layer::default(), }; let result2 = service.search(&query2).await; @@ -248,6 +256,7 @@ mod logical_operators_integration_tests { skip: Some(0), limit: Some(5), role: Some(RoleName::from(role_name)), + layer: Layer::default(), }; let result = service.search(&query).await; diff --git a/crates/terraphim_service/tests/ollama_llama_integration_test.rs b/crates/terraphim_service/tests/ollama_llama_integration_test.rs index 27ffdd44d..900472418 100644 --- a/crates/terraphim_service/tests/ollama_llama_integration_test.rs +++ b/crates/terraphim_service/tests/ollama_llama_integration_test.rs @@ -4,7 +4,7 @@ use ahash::AHashMap; use serial_test::serial; use terraphim_config::{Config, ConfigState, Haystack, Role, ServiceType}; use terraphim_service::{TerraphimService, llm}; -use terraphim_types::{NormalizedTermValue, RelevanceFunction, RoleName, SearchQuery}; +use terraphim_types::{Layer, NormalizedTermValue, RelevanceFunction, RoleName, SearchQuery}; /// Comprehensive integration test suite for Ollama LLM integration with llama3.2:3b /// Tests connectivity, summarization, role configuration, and end-to-end search functionality @@ -308,6 +308,7 @@ Rust has a vibrant community with excellent documentation, tutorials, and exampl limit: Some(5), skip: None, role: Some(role_name.clone()), + layer: Layer::default(), }; let results = service diff --git a/crates/terraphim_service/tests/terraphim_graph_lexical_fallback_test.rs b/crates/terraphim_service/tests/terraphim_graph_lexical_fallback_test.rs index abd56bc42..1d83cfc7a 100644 --- a/crates/terraphim_service/tests/terraphim_graph_lexical_fallback_test.rs +++ b/crates/terraphim_service/tests/terraphim_graph_lexical_fallback_test.rs @@ -91,6 +91,7 @@ async fn terraphim_graph_falls_back_to_lexical_when_graph_returns_empty() { role: Some(role_name), skip: None, limit: None, + layer: Default::default(), }; let results = service.search(&query).await.expect("search succeeds"); diff --git a/crates/terraphim_service/tests/weighted_haystack_ranking_test.rs b/crates/terraphim_service/tests/weighted_haystack_ranking_test.rs index 1ffa79525..2c1520886 100644 --- a/crates/terraphim_service/tests/weighted_haystack_ranking_test.rs +++ b/crates/terraphim_service/tests/weighted_haystack_ranking_test.rs @@ -3,7 +3,9 @@ use std::collections::HashMap; use terraphim_config::{Config, ConfigState, Haystack, Role, ServiceType}; use terraphim_persistence::Persistable; use terraphim_service::TerraphimService; -use terraphim_types::{Document, NormalizedTermValue, RelevanceFunction, RoleName, SearchQuery}; +use terraphim_types::{ + Document, Layer, NormalizedTermValue, RelevanceFunction, RoleName, SearchQuery, +}; /// Test that haystack weights correctly affect document ranking #[tokio::test] @@ -118,6 +120,7 @@ async fn test_weighted_haystack_ranking() { skip: None, limit: None, role: Some(RoleName::from("Test Role")), + layer: Layer::default(), }; // Perform search which should apply haystack weights let search_result = service.search(&search_query).await.expect("Search failed"); @@ -228,6 +231,7 @@ async fn test_default_weight_handling() { skip: None, limit: None, role: Some(RoleName::from("Test Role")), + layer: Layer::default(), }; let search_result = service.search(&search_query).await.expect("Search failed"); diff --git a/crates/terraphim_types/src/lib.rs b/crates/terraphim_types/src/lib.rs index 4d929a0ff..98382d3b2 100644 --- a/crates/terraphim_types/src/lib.rs +++ b/crates/terraphim_types/src/lib.rs @@ -788,6 +788,89 @@ pub enum LogicalOperator { Or, } +/// Layered output levels for search results. +/// +/// Controls how much content is returned per search result to optimize token usage: +/// - Layer 1: Title + tags only (~50 tokens/result) +/// - Layer 2: + first paragraph summary (~150 tokens/result) +/// - Layer 3: Full content (current default behaviour) +#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq, Default, JsonSchema)] +#[cfg_attr(feature = "typescript", derive(Tsify))] +#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))] +pub enum Layer { + /// Title + tags only (~50 tokens/result) + #[serde(rename = "1")] + #[default] + One, + /// + first paragraph summary (~150 tokens/result) + #[serde(rename = "2")] + Two, + /// Full content (default) + #[serde(rename = "3")] + Three, +} + +impl Layer { + /// Parse a layer from an integer value (1, 2, or 3) + pub fn from_u8(value: u8) -> Option { + match value { + 1 => Some(Layer::One), + 2 => Some(Layer::Two), + 3 => Some(Layer::Three), + _ => None, + } + } + + /// Returns true if this layer includes content (layer 2 or 3) + pub fn includes_content(&self) -> bool { + matches!(self, Layer::Two | Layer::Three) + } + + /// Returns true if this layer includes full content (layer 3) + pub fn includes_full_content(&self) -> bool { + matches!(self, Layer::Three) + } +} + +impl std::fmt::Display for Layer { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Layer::One => write!(f, "1"), + Layer::Two => write!(f, "2"), + Layer::Three => write!(f, "3"), + } + } +} + +/// Extract the first paragraph from document body text. +/// +/// Skips YAML frontmatter (content between `---` markers) and returns +/// the first non-empty line or the first paragraph. +pub fn extract_first_paragraph(body: &str) -> String { + // Skip YAML frontmatter if present + let content = if body.trim_start().starts_with("---") { + // Find the end of frontmatter + if let Some(end_pos) = body[3..].find("---") { + &body[end_pos + 6..] // Skip past the closing --- + } else { + body + } + } else { + body + }; + + // Find first non-empty line + for line in content.lines() { + let trimmed = line.trim(); + if !trimmed.is_empty() { + return trimmed.to_string(); + } + } + + // Fallback to empty string if no content found + String::new() +} + /// A search query for finding documents in the knowledge graph. /// /// Supports both single-term and multi-term queries with logical operators (AND/OR). @@ -841,6 +924,9 @@ pub struct SearchQuery { pub limit: Option, /// Role context for this search pub role: Option, + /// Output layer for controlling result detail (1=minimal, 2=summary, 3=full) + #[serde(default)] + pub layer: Layer, } impl SearchQuery { @@ -893,6 +979,7 @@ impl SearchQuery { skip: None, limit: None, role, + layer: Layer::default(), } } } @@ -2357,6 +2444,7 @@ mod tests { skip: None, limit: Some(10), role: Some(RoleName::new("test")), + layer: Layer::default(), }; assert!(!single_query.is_multi_term_query()); @@ -2420,6 +2508,7 @@ mod tests { skip: Some(0), limit: Some(10), role: Some(RoleName::new("test_role")), + layer: Layer::default(), }; let json = serde_json::to_string(&query).unwrap(); @@ -2762,4 +2851,84 @@ mod tests { assert!(schema.entity_types[0].aliases.is_empty()); assert!(schema.entity_types[0].uri_prefix.is_none()); } + + #[test] + fn test_layer_enum() { + // Test default is Layer::One + let default: Layer = Default::default(); + assert_eq!(default, Layer::One); + + // Test from_u8 + assert_eq!(Layer::from_u8(1), Some(Layer::One)); + assert_eq!(Layer::from_u8(2), Some(Layer::Two)); + assert_eq!(Layer::from_u8(3), Some(Layer::Three)); + assert_eq!(Layer::from_u8(0), None); + assert_eq!(Layer::from_u8(4), None); + + // Test Display + assert_eq!(format!("{}", Layer::One), "1"); + assert_eq!(format!("{}", Layer::Two), "2"); + assert_eq!(format!("{}", Layer::Three), "3"); + + // Test includes_content + assert!(!Layer::One.includes_content()); + assert!(Layer::Two.includes_content()); + assert!(Layer::Three.includes_content()); + + // Test includes_full_content + assert!(!Layer::One.includes_full_content()); + assert!(!Layer::Two.includes_full_content()); + assert!(Layer::Three.includes_full_content()); + } + + #[test] + fn test_extract_first_paragraph_simple() { + let body = "First paragraph here.\n\nSecond paragraph here."; + assert_eq!(extract_first_paragraph(body), "First paragraph here."); + } + + #[test] + fn test_extract_first_paragraph_with_yaml_frontmatter() { + let body = "---\ntitle: My Document\ntags: [rust, programming]\n---\n\nThis is the actual first paragraph.\nMore content here."; + assert_eq!( + extract_first_paragraph(body), + "This is the actual first paragraph." + ); + } + + #[test] + fn test_extract_first_paragraph_empty_lines() { + let body = "\n\n\nFirst paragraph after empty lines."; + assert_eq!( + extract_first_paragraph(body), + "First paragraph after empty lines." + ); + } + + #[test] + fn test_extract_first_paragraph_single_line() { + let body = "Just one line"; + assert_eq!(extract_first_paragraph(body), "Just one line"); + } + + #[test] + fn test_layer_serialization() { + // Test that Layer serializes correctly + let query = SearchQuery { + search_term: NormalizedTermValue::new("test".to_string()), + search_terms: None, + operator: None, + skip: None, + limit: None, + role: None, + layer: Layer::Two, + }; + + let json = serde_json::to_string(&query).unwrap(); + assert!(json.contains("\"layer\"")); + + // Deserialize and check layer is preserved + let deserialized: SearchQuery = serde_json::from_str(&json).unwrap(); + assert_eq!(deserialized.layer, Layer::Two); + } } diff --git a/crates/terraphim_validation/src/testing/server_api/fixtures.rs b/crates/terraphim_validation/src/testing/server_api/fixtures.rs index c664e517b..b6c72d783 100644 --- a/crates/terraphim_validation/src/testing/server_api/fixtures.rs +++ b/crates/terraphim_validation/src/testing/server_api/fixtures.rs @@ -5,7 +5,7 @@ use ahash::AHashMap; use terraphim_config::{Config, Role}; -use terraphim_types::{ChatMessage, Document, NormalizedTermValue, RoleName, SearchQuery}; +use terraphim_types::{ChatMessage, Document, Layer, NormalizedTermValue, RoleName, SearchQuery}; /// Test fixtures for API testing pub struct TestFixtures; @@ -68,6 +68,7 @@ impl TestFixtures { role: Some(RoleName::new("TestRole")), skip: Some(0), limit: Some(10), + layer: Layer::default(), } } diff --git a/scripts/tests/test_operators_debug.rs b/scripts/tests/test_operators_debug.rs index 8f78572cd..bf54f5f36 100644 --- a/scripts/tests/test_operators_debug.rs +++ b/scripts/tests/test_operators_debug.rs @@ -1,4 +1,4 @@ -use terraphim_types::{NormalizedTermValue, SearchQuery, LogicalOperator}; +use terraphim_types::{Layer, NormalizedTermValue, SearchQuery, LogicalOperator}; fn main() { // Test case 1: Basic AND query with 2 terms @@ -12,6 +12,7 @@ fn main() { skip: None, limit: None, role: None, + layer: Layer::default(), }; let terms1 = query1.get_all_terms(); @@ -30,6 +31,7 @@ fn main() { skip: None, limit: None, role: None, + layer: Layer::default(), }; let terms2 = query2.get_all_terms(); @@ -49,6 +51,7 @@ fn main() { skip: None, limit: None, role: None, + layer: Layer::default(), }; let terms3 = query3.get_all_terms(); diff --git a/terraphim_server/tests/terraphim_graph_search_test.rs b/terraphim_server/tests/terraphim_graph_search_test.rs index 73a9862e8..4da84fc3d 100644 --- a/terraphim_server/tests/terraphim_graph_search_test.rs +++ b/terraphim_server/tests/terraphim_graph_search_test.rs @@ -6,7 +6,7 @@ use terraphim_config::{Config, Haystack, KnowledgeGraph, KnowledgeGraphLocal, Ro use terraphim_persistence::{DeviceStorage, Persistable}; use terraphim_service::TerraphimService; use terraphim_types::{ - Document, DocumentType, NormalizedTermValue, RelevanceFunction, RoleName, SearchQuery, + Document, DocumentType, Layer, NormalizedTermValue, RelevanceFunction, RoleName, SearchQuery, }; /// Comprehensive test for TerraphimGraph search issue @@ -224,6 +224,7 @@ async fn test_terraphim_graph_search_comprehensive() -> Result<(), Box Result<(), Box> role: Some(role_name.clone()), skip: None, limit: Some(10), + layer: Layer::default(), }; let results = terraphim_service.search(&search_query).await?; diff --git a/terraphim_server/tests/tui_desktop_parity_test.rs b/terraphim_server/tests/tui_desktop_parity_test.rs index 400fe1a66..6a350f6b7 100644 --- a/terraphim_server/tests/tui_desktop_parity_test.rs +++ b/terraphim_server/tests/tui_desktop_parity_test.rs @@ -13,7 +13,7 @@ use terraphim_config::{ }; use terraphim_server::axum_server; use terraphim_types::{ - KnowledgeGraphInputType, NormalizedTermValue, RelevanceFunction, RoleName, SearchQuery, + KnowledgeGraphInputType, Layer, NormalizedTermValue, RelevanceFunction, RoleName, SearchQuery, }; fn sample_config_with_kg() -> terraphim_config::Config { @@ -129,6 +129,7 @@ async fn test_tui_vs_direct_api_search_parity() { skip: Some(0), limit: Some(limit), role: Some(RoleName::new(test_role)), + layer: Layer::default(), }; let tui_result = tui_client.search(&tui_query).await; @@ -366,6 +367,7 @@ async fn test_search_consistency_across_interfaces() { skip: Some(0), limit: Some(3), role: Some(RoleName::new(test_role)), + layer: Layer::default(), }; let mut results = Vec::new(); @@ -420,6 +422,7 @@ async fn test_pagination_parity() { skip: Some(0), limit: Some(limit), role: Some(RoleName::new(role)), + layer: Layer::default(), }; let tui_page1 = tui_client.search(&page1_query).await; @@ -444,6 +447,7 @@ async fn test_pagination_parity() { skip: Some(limit), limit: Some(limit), role: Some(RoleName::new(role)), + layer: Layer::default(), }; let tui_page2 = tui_client.search(&page2_query).await; @@ -503,6 +507,7 @@ async fn test_error_handling_parity() { skip: Some(0), limit: Some(5), role: Some(RoleName::new("NonExistentRole")), + layer: Layer::default(), }; let tui_result = tui_client.search(&invalid_query).await;