Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 38 additions & 4 deletions aw-transform/src/classify.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,9 @@ impl From<Regex> for Rule {
///
/// An event can only have one category, although the category may have a hierarchy,
/// for instance: "Work -> ActivityWatch -> aw-server-rust"
/// If multiple categories match, the deepest one will be chosen.
/// If multiple categories match, the first-matching rule wins, except that
/// a later rule may override when it is a strictly deeper descendant of the
/// currently selected category (honouring rule order across unrelated branches).
pub fn categorize(mut events: Vec<Event>, rules: &[(Vec<String>, Rule)]) -> Vec<Event> {
let mut classified_events = Vec::new();
for event in events.drain(..) {
Expand All @@ -105,7 +107,7 @@ pub fn categorize(mut events: Vec<Event>, rules: &[(Vec<String>, Rule)]) -> Vec<
}

fn categorize_one(mut event: Event, rules: &[(Vec<String>, Rule)]) -> Event {
let mut category: Vec<String> = vec!["Uncategorized".into()];
let mut category: Vec<String> = vec![UNCATEGORIZED.into()];
for (cat, rule) in rules {
if rule.matches(&event) {
category = _pick_highest_ranking_category(category, cat);
Expand Down Expand Up @@ -142,9 +144,15 @@ fn tag_one(mut event: Event, rules: &[(String, Rule)]) -> Event {
event
}

const UNCATEGORIZED: &str = "Uncategorized";

fn _pick_highest_ranking_category(acc: Vec<String>, item: &[String]) -> Vec<String> {
if item.len() >= acc.len() {
// If tag is category with greater or equal depth than current, then choose the new one instead.
if acc == [UNCATEGORIZED] {
item.to_vec()
} else if item.len() > acc.len() && item.starts_with(&acc) {
Comment on lines 149 to +152
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 The acc == ["Uncategorized"] guard treats the sentinel "not yet classified" value and the literal category name "Uncategorized" identically. If a rule ever assigns "Uncategorized" as a real category, any subsequent matching rule will unconditionally override it. The practical risk is low today, but tying correctness to a magic string is fragile — a sentinel field (e.g. Option<Vec<String>>) would make the two states unambiguous.

Suggested change
fn _pick_highest_ranking_category(acc: Vec<String>, item: &[String]) -> Vec<String> {
if item.len() >= acc.len() {
// If tag is category with greater or equal depth than current, then choose the new one instead.
if acc == ["Uncategorized"] {
item.to_vec()
} else if item.len() > acc.len() && item.starts_with(&acc) {
const UNCATEGORIZED: &str = "Uncategorized";
fn _pick_highest_ranking_category(acc: Vec<String>, item: &[String]) -> Vec<String> {
if acc == [UNCATEGORIZED] {
item.to_vec()
} else if item.len() > acc.len() && item.starts_with(&acc) {

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in 07014bb by extracting UNCATEGORIZED and using it in the sentinel check.

// Rule order decides between unrelated categories. A later rule only
// overrides an earlier match when it is a more specific child of the
// already selected category.
item.to_vec()
} else {
acc
Expand Down Expand Up @@ -269,6 +277,32 @@ fn test_categorize_uncategorized() {
);
}

#[test]
fn test_categorize_keeps_earlier_unrelated_category() {
let mut e = Event::default();
e.data
.insert("test".into(), serde_json::json!("just a test"));

let mut events = vec![e];
let rules: Vec<(Vec<String>, Rule)> = vec![
(
vec!["First".into()],
Rule::from(Regex::new(r"test").unwrap()),
),
(
vec!["Second".into(), "Child".into()],
Rule::from(Regex::new(r"test").unwrap()),
),
];
events = categorize(events, &rules);

assert_eq!(events.len(), 1);
assert_eq!(
events.first().unwrap().data.get("$category").unwrap(),
&serde_json::json!(vec!["First"])
);
}

#[test]
fn test_tag() {
let mut e = Event::default();
Expand Down
Loading