From d32d26ddd624474f617d54dee5160793b9a81f49 Mon Sep 17 00:00:00 2001 From: Alex Mikhalev Date: Thu, 26 Mar 2026 13:43:30 +0100 Subject: [PATCH] feat(terraphim_types): add QualityScore metadata (K/L/S) to IndexedDocument Refs #90 Add optional quality scoring for Knowledge/Learning/Synthesis dimensions: - Add QualityScore struct with knowledge, learning, synthesis fields (0.0-1.0) - Add quality_score field to IndexedDocument with serde default - Implement composite() method to average available scores - Update all IndexedDocument constructors to handle new field - Add unit tests for QualityScore methods and backward compatibility Breaking change: IndexedDocument no longer derives Eq due to f64 in QualityScore --- .cachebro/cache.db | Bin 1413120 -> 1503232 bytes crates/terraphim_rolegraph/src/lib.rs | 4 + crates/terraphim_types/src/lib.rs | 185 +++++++++++++++++++++++++- 3 files changed, 188 insertions(+), 1 deletion(-) diff --git a/.cachebro/cache.db b/.cachebro/cache.db index fa84057172e189fb2423608e85f11a224f1c4395..6602fc6526744e77313bd19697c5e9e226847611 100644 GIT binary patch delta 11534 zcmbta3vd)=p0|g2C6iqIZ$+^9#a4@HG0n z=D2_9Z}Oe+4SL`7raYf`9`jsPT-WG7A-G)s{^-i}+lzy9{NHsgyTiRLE5wuieIv`2 zSXWE`$ERQRFJu!J%Y@}!Th%6`jg((8?H%o37VGZmO)ZbdlpA2^7mW)c_m8!6;A!Fy!pj}A*A!oUwQ<$<)_{M*6~P-D zj@Q3cAF8`Oa5CVo-BI(n{{#O9Bq-|dJGAgt?>3~#{ndrCWbL0wxBBxX+Tjnvv+BXV*#zLE3EbhLKq(xqgZp`^%oTG7Z*TJKt`=Ve31 zA)~mmK{TBxcgsnW7!#VVO=L;dBb&*z0*79A&4;OvT^^Wv$J;QumG8ZtQFBi%!5_Nz z&2o;6=JhPDlTs3DikLc4O`{sA_-VI-1cx*FxROuGNL7TbW3gonkf9r8BaKV0D)jbr zlG~KzsxA0Y8(X~^{;w`n8(*)X7z~A^^kgBcXeMs0>?tT(a)OCLk&|h@3tLxBjlz>x zheGbr-k#Mkv@g`U$ew7Ban;1JoGF71$8YfRBcbM{Yjtf@9V_JJ5!6Cn$tZheEvXPQ zomY&so=K5#EE?;IM`N6P_`_|UmL*l>lPipnQH^NIc-WZ>Lr^>S__2UkYi*58gRtE- z-;31Rzt=}#9hM{gM5C8JIWyvx0Y&z z6f_kfXM;IM&Cx+4n&P?}9DeAujAGA-4-h$x@E#aoDBq%(s}>t(el?W{GmNNeDk#(_ zDl)v^*iQ_o{~h}JPj3?aK{kRh#I}dFIC*c^e(cn7F9Jv$(!MhlEB!`uz7K?byCmf3Z_EP=M0dHJ{AsdB-fC5ge)cD)F}Hk zLjNUUBdhBOKg)V;E;VF?p~L|%kCp%poj zx}jS1OQ0+Y8PhVFGFGAO1F@wEEn+j3 zh8?JoH&f5Ui67UA3-S{Qa}-5i=F%zHJ1HJYmMfKDUBTQDx7cb@93TQ#Qp>we)nv92 zb@Oog7AbT!rICY$fsO)TOffmSqOy^|-NKRMueqBxB8p0Kq-gE1HlqZVn~pOr64mnw z3r^_j5ou_nqojj@>Dbdk%NSol)AQK@$`X=VzUE#zQ&0@sl=to@dvWgGNYp4~2L@JA z5#Tpt4_aawWsZjFs$_7G;Lfo5h|pw%230IYnF*E5mm9VPB5qLSo*FLxNDM(|I8;+_ zVZu_uKwi(tgen@+|87kz@n=Cpk& z<+)G>@+Kj)lKDDD>{r*=p$#G#q0E-+%V9qHwX{WXgc`F~rTT*RB%uk0zah53(IKI6 z#oFoS=hzpWO!1r2F~HA&4=jDSt^?kDG2m&io#mbc5*Y#EuiYZNd{*=?O3@dHx)K*Q z_%VUc{i zr_(I~mPc$$3T1OwTS1^K;rVC3M3^6m<_bnSTo!#w^WfM{_k1`dx@tVk>A)Y?p)4=- z3H|kS5BkI34>fs1+jxFFp2mP_ye<76od;2|igIsQ1v7$07cFuoX__-BJkosn`N(0Qst5 z>so8+wr~ahD2<&JZ`v`Yw^zKOV&6~NWev^?-q0leiteZSkbL#2BL%bSculYVenKx) z9gk&AyHt&~Bpmv^Tco6=mFlF8%SP2DF+ZsOzEFJ~{SvHTPS+l*Mp|0lTYZNSMFHW# z`p{;i+Dxm}$urY(brI3Rr7GcA;jv{kswXVTst!^$Q?L2~ttnZ3U4(^NT_ocwss&DM z@HWAJZVvh2bX#bN--;0iytF2C<)n@g3WImX&ib85z-rfUUP)pNYvv1BfU-JgkYspSvPLM(O5ImgbX^C2J3v*Y}oUC_m!~ejN2P6MNpOi zNA_cuIqhBaE?w2a?W7&8PTpA4?xE%G5WM#tkrW^Qw)DiTNopSS~_!V&V>q%Df8O*M3>P z*|?cJL;Qb5vH1BRadg(5RJE6(V_<+5^|9zOEHIFmb;_uWiE^o8z}#(Fg-fWEGfYi} zn&Is;b#sb)?-y_O!qg7IJ!$V(j`4wtLIQJA>;=(0x4J<@Djj;|rl?$&8cuubR7&vB zPlXzo|Fm~D#5!EQ;@c02{jN#m&gwTDKrzi)G%rus5WQ8M_&gC=(a74L3+|^TR?-alyml)53-sYWzY0agNvq z(*y+{z{nKI<9x+WWzwQ(q0=HX=QgUZd#^Bh@Cep zG)I}ZD6BVe3{!h#MosN`x*fAlo~lXAtP`9{4E>1Lf)a(F9TnS;f97j}skIHkC{1Su z2rIoDl~@kJnmIs5bUnl60vC^pb0PXf$mivr0)}1_+jp=+)R|7Hq+EBSX>vn%nvPX^ zjMy?sA*UGTv@vH1WZqi;?E7L7vNLqNSY1jxF%=R!dk-b&>=t{V@)7sHq5F3x0F4HqX3xTWnzn`=YcNRc?Q9=7QM#=&27e5nqGZ_~RQTeL1kjBV5u;xE5D zrH^YQrD7yD)xC;Q!s1#?x`;fAO(|>#$tjvlfAbr};+*Bx%qmrvHU{4zp?P!YD916e z%^>%ep#|4j`=bM@8mCJ($vNXaIv#{5FZcsz?h8urlw2oHUfO{;B}W+)*yt$t9yS?e zpJyTpVaDc^JW;vQG`*1rN*u5_m~)Gg)e+|MqC4D-Vi--swgzWRE^*-K5%Ux3y4>R$2>pLh#s93u4DL37TLh z?47I{*jiy4+3?$wVlT}9p}1usP50PloV51NimhR4z9hlXABhBVe=lBL{OE^bgZqmi z(as<-0&VBTb@Mi}UR(LGg#!oB&Wm$}b~yhZViPougc{+)NrcYu&FDEUoEPU6H@qM| z?<@Y{j2IQ*Y%- z2|u(oNDDxnEwvS|Ym}~M2cPLMoK8zE#S_iaupf3urEeB*A<_!At_r7jdqeQ)IWbuL z!4he^A2uzLu7a2(d5T>zX$Kp07knO(-h+3tHLb;iE2M+IqI-?C?QS^RCfy4co^Plr zHryzE%7IaUh;ZcF+6VNsnU9OREGU}ObivnG~nzUXQt9PM-ItU~60;u#JAN`hbC5G*SQeY<>=%hbpCO7p1w7|1WJxYPfoHgQQh|<1 z*YxZ}ds$RN2yRL11w16=*dh#W1U(e%QDJwCSO*;=3jMcXen~%BXVi;pDbQ%%YJ8US2T_ijz{7A zgqI6A#02ciAE&IB4~(e$T* zJf$c(8}fMFd)y5dSJbw^GfF6MX;-jihS)3DFs7k?J3Z5<7rYCmv0*H}R@YqY$>D>*)(zNoDUf!_E8Albi zIb2Tr)g1b@(_L48e?NEWb{^yu5iH@u($%>xLL1kYF&|->AX!N}HiPjLL0f`tsuUJP z7#6V^re~C~yqrrT)AW+XxI*rt!D1Z!9Q{rsr^6na73*?UNCGQgysnbmjpYmzWBKT< z*j>faxSz=M*q+B>dPxG4;>svJqF}dH)$xE5YgP6c8>L7Xe0Ec9OWO<>PL~P~yUhMi zoeu`#p|^!V4TdBxMi|n(*DSCVZ_mX;JG)rons4fMWKogjab;5BO&2?&xnY~s58qlK z)xp2q=D!M#{%u_y5Jl>Qh5Il=dVEUo^D7YOPuV(-bZn^DnrEJ~-~P)9?6KJ8LOXMQ zEV?rUOgh?(&a4IP8rQ z`!kr$@jk1iOgOa{bKEPpp%yNy9$VyGJ^cJiw%ZJ}4|I8rbxhY-x2-Wl&z4GmcECmR zuCJt`Wo#{5QMA*EZ)mHDbABn!SzgI*`;acYd4%a7Yi+gIsxwSHJfMNmk{b*OHR3F^ zw37aMrt2?>_ZtwmN@fDh882pGb0fv`UUK)uK2#yLz_zU69| z2rxl*bb&}-N0rUn*DNDL<>T!Y1>(2Xi16S6=@;->MhZ09M?119GvITKz@dE&P4LLQ zQq8=}>9KC;9PcI(_KE5LGG2Vy$FwlJeQ`g*3gZE({d|h$5r`^-N U_D{U;TAcbb>490aVX))>0k$cv9RL6T delta 273 zcmZp85Z$mKY=ShaBm)Cu@?ZhGkm+P5JxZ5~j|9N`k0zE**QfBFF}L5J-pP6*h9 zOK@?#W#ISVo5p*MSC}V)dlfeeR}|+Qj<-O47dhJBY6t?c5D*IkvB>tf8luAK04o1b AnE(I) diff --git a/crates/terraphim_rolegraph/src/lib.rs b/crates/terraphim_rolegraph/src/lib.rs index d2461dfeb..31bff524e 100644 --- a/crates/terraphim_rolegraph/src/lib.rs +++ b/crates/terraphim_rolegraph/src/lib.rs @@ -633,6 +633,7 @@ impl RoleGraph { rank: total_rank, tags: vec![normalized_term.to_string()], nodes: vec![node_id], + quality_score: None, }); } Entry::Occupied(mut e) => { @@ -732,6 +733,7 @@ impl RoleGraph { rank: total_rank, tags: vec![normalized_term.to_string()], nodes: vec![node_id], + quality_score: None, }); } Entry::Occupied(mut e) => { @@ -835,6 +837,7 @@ impl RoleGraph { rank: total_rank, tags: vec![normalized_term.to_string()], nodes: vec![node_id], + quality_score: None, }); } Entry::Occupied(mut e) => { @@ -938,6 +941,7 @@ impl RoleGraph { rank: total_rank, tags: vec![normalized_term.to_string()], nodes: vec![node_id], + quality_score: None, }, vec![term.to_string()], )); diff --git a/crates/terraphim_types/src/lib.rs b/crates/terraphim_types/src/lib.rs index 98382d3b2..0cadd7801 100644 --- a/crates/terraphim_types/src/lib.rs +++ b/crates/terraphim_types/src/lib.rs @@ -744,8 +744,67 @@ impl IntoIterator for Index { } } +/// Quality scores for Knowledge/Learning/Synthesis (K/L/S) dimensions. +/// +/// These scores represent the quality of a document across three dimensions: +/// - Knowledge: Depth and accuracy of domain knowledge +/// - Learning: Educational value and clarity +/// - Synthesis: Integration of concepts and insight +/// +/// All scores are optional and range from 0.0 to 1.0 when present. +#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq)] +pub struct QualityScore { + /// Knowledge quality score (0.0-1.0) + pub knowledge: Option, + /// Learning quality score (0.0-1.0) + pub learning: Option, + /// Synthesis quality score (0.0-1.0) + pub synthesis: Option, +} + +impl QualityScore { + /// Calculate the composite score by averaging all available scores. + /// + /// Returns 0.0 if no scores are available. + /// + /// # Examples + /// + /// ``` + /// use terraphim_types::QualityScore; + /// + /// let score = QualityScore { + /// knowledge: Some(0.8), + /// learning: Some(0.6), + /// synthesis: None, + /// }; + /// assert_eq!(score.composite(), 0.7); // (0.8 + 0.6) / 2 + /// + /// let empty = QualityScore::default(); + /// assert_eq!(empty.composite(), 0.0); + /// ``` + pub fn composite(&self) -> f64 { + let mut sum = 0.0; + let mut count = 0; + + if let Some(k) = self.knowledge { + sum += k; + count += 1; + } + if let Some(l) = self.learning { + sum += l; + count += 1; + } + if let Some(s) = self.synthesis { + sum += s; + count += 1; + } + + if count == 0 { 0.0 } else { sum / count as f64 } + } +} + /// Reference to external storage of documents -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub struct IndexedDocument { /// UUID of the indexed document, matching external storage id pub id: String, @@ -758,6 +817,9 @@ pub struct IndexedDocument { pub tags: Vec, /// List of node IDs for validation of matching pub nodes: Vec, + /// Quality scores for K/L/S dimensions + #[serde(default)] + pub quality_score: Option, } impl IndexedDocument { @@ -771,6 +833,7 @@ impl IndexedDocument { rank: 0, tags: document.tags.unwrap_or_default(), nodes: Vec::new(), + quality_score: None, } } } @@ -2931,4 +2994,124 @@ mod tests { let deserialized: SearchQuery = serde_json::from_str(&json).unwrap(); assert_eq!(deserialized.layer, Layer::Two); } + + #[test] + fn test_quality_score_composite() { + // Test with all three scores + let full_score = QualityScore { + knowledge: Some(0.8), + learning: Some(0.6), + synthesis: Some(0.7), + }; + assert!((full_score.composite() - 0.7).abs() < f64::EPSILON); // (0.8 + 0.6 + 0.7) / 3 + + // Test with two scores + let partial_score = QualityScore { + knowledge: Some(0.9), + learning: None, + synthesis: Some(0.5), + }; + assert!((partial_score.composite() - 0.7).abs() < f64::EPSILON); // (0.9 + 0.5) / 2 + + // Test with one score + let single_score = QualityScore { + knowledge: Some(0.8), + learning: None, + synthesis: None, + }; + assert!((single_score.composite() - 0.8).abs() < f64::EPSILON); + + // Test with no scores (default) + let empty_score = QualityScore::default(); + assert_eq!(empty_score.composite(), 0.0); + } + + #[test] + fn test_quality_score_serialization() { + let score = QualityScore { + knowledge: Some(0.8), + learning: Some(0.6), + synthesis: Some(0.7), + }; + + let json = serde_json::to_string(&score).unwrap(); + assert!(json.contains("0.8")); + assert!(json.contains("0.6")); + assert!(json.contains("0.7")); + + let deserialized: QualityScore = serde_json::from_str(&json).unwrap(); + assert_eq!(deserialized.knowledge, Some(0.8)); + assert_eq!(deserialized.learning, Some(0.6)); + assert_eq!(deserialized.synthesis, Some(0.7)); + } + + #[test] + fn test_quality_score_default_serialization() { + // Test that default QualityScore serializes/deserializes correctly + let score = QualityScore::default(); + let json = serde_json::to_string(&score).unwrap(); + let deserialized: QualityScore = serde_json::from_str(&json).unwrap(); + assert!(deserialized.knowledge.is_none()); + assert!(deserialized.learning.is_none()); + assert!(deserialized.synthesis.is_none()); + } + + #[test] + fn test_indexed_document_with_quality_score() { + let doc = IndexedDocument { + id: "test-doc-1".to_string(), + matched_edges: vec![], + rank: 10, + tags: vec!["rust".to_string()], + nodes: vec![1, 2], + quality_score: Some(QualityScore { + knowledge: Some(0.8), + learning: Some(0.6), + synthesis: Some(0.7), + }), + }; + + assert_eq!(doc.id, "test-doc-1"); + assert!((doc.quality_score.as_ref().unwrap().composite() - 0.7).abs() < f64::EPSILON); + } + + #[test] + fn test_indexed_document_from_document_quality_score_none() { + let doc = Document { + id: "doc-1".to_string(), + url: "https://example.com".to_string(), + title: "Test".to_string(), + body: "Body".to_string(), + description: None, + summarization: None, + stub: None, + tags: None, + rank: None, + source_haystack: None, + doc_type: DocumentType::Document, + synonyms: None, + route: None, + priority: None, + }; + + let indexed = IndexedDocument::from_document(doc); + assert!(indexed.quality_score.is_none()); + } + + #[test] + fn test_indexed_document_serialization_backward_compat() { + // Test that IndexedDocument without quality_score deserializes correctly + // This simulates old data that doesn't have the quality_score field + let json = r#"{ + "id": "doc-1", + "matched_edges": [], + "rank": 5, + "tags": ["test"], + "nodes": [1] + }"#; + + let doc: IndexedDocument = serde_json::from_str(json).unwrap(); + assert_eq!(doc.id, "doc-1"); + assert!(doc.quality_score.is_none()); + } }