From ce4bdc7d08a4b24c929161470444efda25790a10 Mon Sep 17 00:00:00 2001 From: Bob Date: Sun, 24 May 2026 00:05:09 +0000 Subject: [PATCH 1/2] fix(classify): honor rule order across branches --- aw-transform/src/classify.rs | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/aw-transform/src/classify.rs b/aw-transform/src/classify.rs index b3fe9758..8a9f7924 100644 --- a/aw-transform/src/classify.rs +++ b/aw-transform/src/classify.rs @@ -143,8 +143,12 @@ fn tag_one(mut event: Event, rules: &[(String, Rule)]) -> Event { } fn _pick_highest_ranking_category(acc: Vec, item: &[String]) -> Vec { - if item.len() >= acc.len() { - // If tag is category with greater or equal depth than current, then choose the new one instead. + if acc == ["Uncategorized"] { + item.to_vec() + } else if item.len() > acc.len() && item.starts_with(&acc) { + // Rule order decides between unrelated categories. A later rule only + // overrides an earlier match when it is a more specific child of the + // already selected category. item.to_vec() } else { acc @@ -269,6 +273,32 @@ fn test_categorize_uncategorized() { ); } +#[test] +fn test_categorize_keeps_earlier_unrelated_category() { + let mut e = Event::default(); + e.data + .insert("test".into(), serde_json::json!("just a test")); + + let mut events = vec![e]; + let rules: Vec<(Vec, Rule)> = vec![ + ( + vec!["First".into()], + Rule::from(Regex::new(r"test").unwrap()), + ), + ( + vec!["Second".into(), "Child".into()], + Rule::from(Regex::new(r"test").unwrap()), + ), + ]; + events = categorize(events, &rules); + + assert_eq!(events.len(), 1); + assert_eq!( + events.first().unwrap().data.get("$category").unwrap(), + &serde_json::json!(vec!["First"]) + ); +} + #[test] fn test_tag() { let mut e = Event::default(); From 07014bbc9927506207892dbad1ad14c7890f6ffa Mon Sep 17 00:00:00 2001 From: Bob Date: Sun, 24 May 2026 00:31:02 +0000 Subject: [PATCH 2/2] docs(classify): update doc comment to reflect rule-order semantics; use UNCATEGORIZED constant - Update the comment on categorize() to describe the new first-match-wins behaviour rather than the old greedy-depth selection. - Extract "Uncategorized" into a named const to avoid treating the sentinel and the literal category name identically (per Greptile review). --- aw-transform/src/classify.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/aw-transform/src/classify.rs b/aw-transform/src/classify.rs index 8a9f7924..af426132 100644 --- a/aw-transform/src/classify.rs +++ b/aw-transform/src/classify.rs @@ -95,7 +95,9 @@ impl From for Rule { /// /// An event can only have one category, although the category may have a hierarchy, /// for instance: "Work -> ActivityWatch -> aw-server-rust" -/// If multiple categories match, the deepest one will be chosen. +/// If multiple categories match, the first-matching rule wins, except that +/// a later rule may override when it is a strictly deeper descendant of the +/// currently selected category (honouring rule order across unrelated branches). pub fn categorize(mut events: Vec, rules: &[(Vec, Rule)]) -> Vec { let mut classified_events = Vec::new(); for event in events.drain(..) { @@ -105,7 +107,7 @@ pub fn categorize(mut events: Vec, rules: &[(Vec, Rule)]) -> Vec< } fn categorize_one(mut event: Event, rules: &[(Vec, Rule)]) -> Event { - let mut category: Vec = vec!["Uncategorized".into()]; + let mut category: Vec = vec![UNCATEGORIZED.into()]; for (cat, rule) in rules { if rule.matches(&event) { category = _pick_highest_ranking_category(category, cat); @@ -142,8 +144,10 @@ fn tag_one(mut event: Event, rules: &[(String, Rule)]) -> Event { event } +const UNCATEGORIZED: &str = "Uncategorized"; + fn _pick_highest_ranking_category(acc: Vec, item: &[String]) -> Vec { - if acc == ["Uncategorized"] { + if acc == [UNCATEGORIZED] { item.to_vec() } else if item.len() > acc.len() && item.starts_with(&acc) { // Rule order decides between unrelated categories. A later rule only