From be9b5397f6b4a4d405095903ebab0c7adb18a5e6 Mon Sep 17 00:00:00 2001 From: Mauro Ezequiel Moltrasio Date: Wed, 8 Apr 2026 17:34:19 +0200 Subject: [PATCH 1/2] ROX-33216: implement inode tracking for path_rename This is a first attempt at adding inode tracking to the path_rename LSM hook. A number of changes were needed in order to cover all the possible edge cases in this operation. The most important change is that inodes are now always sent to userspace alongside with a value that indicates how the event is triggered (by matching an inode or a path), this was needed because the path_rename operation will use an inode number of 0 the same we were doing when the new path is not populated (i.e there is no file on the path we are landing). As a side effect of this, inodes on the kernel side cannot be NULL, some additional cleanup might be needed. Properly tracking rename operations requires collaboration between kernelspace and userspace, particularly when renaming a directory. In this case, the LSM hook will trigger once and we have no way of iterating through subdirectories and files contained in the directories involved, so the kernel does its best effort with the available information and leaves the heavy lifting to userspace. Where possible, userspace will use the available information in memory to adjust the paths being tracked, any discrepancies will be sorted by (hopefully) subsequent events or a periodic scan of the host. The logic on both sides is documented as thoroughly as possible. --- fact-ebpf/src/bpf/events.h | 10 +- fact-ebpf/src/bpf/file.h | 13 ++- fact-ebpf/src/bpf/inode.h | 17 +-- fact-ebpf/src/bpf/main.c | 98 +++++++++++++---- fact-ebpf/src/bpf/types.h | 14 ++- fact-ebpf/src/lib.rs | 21 ++++ fact/src/bpf/mod.rs | 13 ++- fact/src/event/mod.rs | 64 +++++++++-- fact/src/host_scanner.rs | 179 +++++++++++++++++++++++++++++-- fact/src/metrics/host_scanner.rs | 7 +- fact/src/metrics/mod.rs | 13 +++ tests/test_path_rename.py | 72 ++++++------- 12 files changed, 421 insertions(+), 100 deletions(-) diff --git a/fact-ebpf/src/bpf/events.h b/fact-ebpf/src/bpf/events.h index 276a1d97..a3d30ea9 100644 --- a/fact-ebpf/src/bpf/events.h +++ b/fact-ebpf/src/bpf/events.h @@ -18,6 +18,7 @@ struct submit_event_args_t { const char* filename; inode_key_t inode; inode_key_t parent_inode; + monitored_t monitored; }; __always_inline static bool reserve_event(struct submit_event_args_t* args) { @@ -33,6 +34,7 @@ __always_inline static void __submit_event(struct submit_event_args_t* args, bool use_bpf_d_path) { struct event_t* event = args->event; event->timestamp = bpf_ktime_get_boot_ns(); + event->monitored = args->monitored; inode_copy(&event->inode, &args->inode); inode_copy(&event->parent_inode, &args->parent_inode); bpf_probe_read_str(event->filename, PATH_MAX, args->filename); @@ -110,14 +112,16 @@ __always_inline static void submit_ownership_event(struct submit_event_args_t* a __always_inline static void submit_rename_event(struct submit_event_args_t* args, const char old_filename[PATH_MAX], - inode_key_t* old_inode) { + inode_key_t* old_inode, + monitored_t old_monitored) { if (!reserve_event(args)) { return; } args->event->type = FILE_ACTIVITY_RENAME; - bpf_probe_read_str(args->event->rename.old_filename, PATH_MAX, old_filename); - inode_copy(&args->event->rename.old_inode, old_inode); + bpf_probe_read_str(args->event->rename.filename, PATH_MAX, old_filename); + inode_copy(&args->event->rename.inode, old_inode); + args->event->rename.monitored = old_monitored; __submit_event(args, path_hooks_support_bpf_d_path); } diff --git a/fact-ebpf/src/bpf/file.h b/fact-ebpf/src/bpf/file.h index efdccfc5..942ecbdf 100644 --- a/fact-ebpf/src/bpf/file.h +++ b/fact-ebpf/src/bpf/file.h @@ -27,18 +27,17 @@ __always_inline static bool path_is_monitored(struct bound_path_t* path) { return res; } -__always_inline static inode_monitored_t is_monitored(inode_key_t* inode, struct bound_path_t* path, const inode_key_t* parent) { +__always_inline static monitored_t is_monitored(const inode_key_t* inode, struct bound_path_t* path, const inode_key_t* parent) { const inode_value_t* volatile inode_value = inode_get(inode); const inode_value_t* volatile parent_value = inode_get(parent); - inode_monitored_t status = inode_is_monitored(inode_value, parent_value); + monitored_t status = inode_is_monitored(inode_value, parent_value); if (status != NOT_MONITORED) { return status; } - inode_reset(inode); if (path_is_monitored(path)) { - return MONITORED; + return MONITORED_BY_PATH; } return NOT_MONITORED; @@ -46,15 +45,15 @@ __always_inline static inode_monitored_t is_monitored(inode_key_t* inode, struct // Check if a new directory should be tracked based on its parent and path. // This is used during mkdir operations where the child inode doesn't exist yet. -__always_inline static inode_monitored_t should_track_mkdir(inode_key_t parent_inode, struct bound_path_t* child_path) { +__always_inline static monitored_t should_track_mkdir(inode_key_t parent_inode, struct bound_path_t* child_path) { const inode_value_t* volatile parent_value = inode_get(&parent_inode); if (parent_value != NULL) { - return PARENT_MONITORED; + return MONITORED_BY_PARENT; } if (path_is_monitored(child_path)) { - return MONITORED; + return MONITORED_BY_PATH; } return NOT_MONITORED; diff --git a/fact-ebpf/src/bpf/inode.h b/fact-ebpf/src/bpf/inode.h index 6b213c3b..01f01814 100644 --- a/fact-ebpf/src/bpf/inode.h +++ b/fact-ebpf/src/bpf/inode.h @@ -85,20 +85,21 @@ __always_inline static void inode_reset(struct inode_key_t* inode) { inode->dev = 0; } -typedef enum inode_monitored_t { - NOT_MONITORED = 0, - MONITORED, - PARENT_MONITORED, -} inode_monitored_t; +/* + * Check if the supplied inode is empty + */ +__always_inline static bool inode_is_empty(struct inode_key_t* inode) { + return inode->inode == 0 && inode->dev == 0; +} // Check if the provided inode or its parent is being monitored. -__always_inline static inode_monitored_t inode_is_monitored(const inode_value_t* volatile inode, const inode_value_t* volatile parent_inode) { +__always_inline static monitored_t inode_is_monitored(const inode_value_t* volatile inode, const inode_value_t* volatile parent_inode) { if (inode != NULL) { - return MONITORED; + return MONITORED_BY_INODE; } if (parent_inode != NULL) { - return PARENT_MONITORED; + return MONITORED_BY_PARENT; } return NOT_MONITORED; diff --git a/fact-ebpf/src/bpf/main.c b/fact-ebpf/src/bpf/main.c index c6effe8d..258aba0c 100644 --- a/fact-ebpf/src/bpf/main.c +++ b/fact-ebpf/src/bpf/main.c @@ -52,14 +52,13 @@ int BPF_PROG(trace_file_open, struct file* file) { struct inode* parent_inode_ptr = parent_dentry ? BPF_CORE_READ(parent_dentry, d_inode) : NULL; args.parent_inode = inode_to_key(parent_inode_ptr); - inode_monitored_t status = is_monitored(&args.inode, path, &args.parent_inode); - - if (status == PARENT_MONITORED && event_type == FILE_ACTIVITY_CREATION) { - inode_add(&args.inode); + args.monitored = is_monitored(&args.inode, path, &args.parent_inode); + if (args.monitored == NOT_MONITORED) { + goto ignored; } - if (status == NOT_MONITORED) { - goto ignored; + if (args.monitored == MONITORED_BY_PARENT && event_type == FILE_ACTIVITY_CREATION) { + inode_add(&args.inode); } submit_open_event(&args, event_type); @@ -90,8 +89,9 @@ int BPF_PROG(trace_path_unlink, struct path* dir, struct dentry* dentry) { args.filename = path->path; args.inode = inode_to_key(dentry->d_inode); + args.monitored = is_monitored(&args.inode, path, NULL); - if (is_monitored(&args.inode, path, NULL) == NOT_MONITORED) { + if (args.monitored == NOT_MONITORED) { m->path_unlink.ignored++; return 0; } @@ -122,8 +122,9 @@ int BPF_PROG(trace_path_chmod, struct path* path, umode_t mode) { args.filename = bound_path->path; args.inode = inode_to_key(path->dentry->d_inode); + args.monitored = is_monitored(&args.inode, bound_path, NULL); - if (is_monitored(&args.inode, bound_path, NULL) == NOT_MONITORED) { + if (args.monitored == NOT_MONITORED) { args.metrics->ignored++; return 0; } @@ -156,8 +157,9 @@ int BPF_PROG(trace_path_chown, struct path* path, unsigned long long uid, unsign args.filename = bound_path->path; args.inode = inode_to_key(path->dentry->d_inode); + args.monitored = is_monitored(&args.inode, bound_path, NULL); - if (is_monitored(&args.inode, bound_path, NULL) == NOT_MONITORED) { + if (args.monitored == NOT_MONITORED) { args.metrics->ignored++; return 0; } @@ -197,18 +199,75 @@ int BPF_PROG(trace_path_rename, struct path* old_dir, } args.inode = inode_to_key(new_dentry->d_inode); + args.parent_inode = inode_to_key(new_dir->dentry->d_inode); + args.monitored = is_monitored(&args.inode, new_path, &args.parent_inode); inode_key_t old_inode = inode_to_key(old_dentry->d_inode); - - inode_monitored_t old_monitored = is_monitored(&old_inode, old_path, NULL); - inode_monitored_t new_monitored = is_monitored(&args.inode, new_path, NULL); - - if (old_monitored == NOT_MONITORED && new_monitored == NOT_MONITORED) { - args.metrics->ignored++; - return 0; + monitored_t old_monitored = is_monitored(&old_inode, old_path, NULL); + + // From this point on we need to handle inode tracking. + // + // The result will be a combination of whether we are already tracking + // the old inode or not and whether the target path has an existing + // object that is about to be overwritten and if said object is + // tracked by inode or not. + switch (args.monitored) { + case NOT_MONITORED: + if (old_monitored == NOT_MONITORED) { + m->path_rename.ignored++; + return 0; + } + + if (old_monitored == MONITORED_BY_INODE) { + // Old inode is monitored, new path is not. + // If the old path is a directory userspace will remove any + // subdirectories and files too. + inode_remove(&old_inode); + } + break; + + case MONITORED_BY_PATH: + if (old_monitored == MONITORED_BY_INODE) { + // New path is not inode tracked, old path is. + // + // This implies the inode will be crossing a FS mountpoint, + // which should never happen. When the inode crosses into a new + // mount, a new inode is created altogether. Still, we can cover + // our bases. + inode_remove(&old_inode); + } + break; + + case MONITORED_BY_PARENT: + if (old_monitored != MONITORED_BY_INODE) { + // Old inode is not monitored, new parent is. + if (inode_is_empty(&args.inode)) { + // Landing on an empty path, we track the inode in case we + // need to, userspace will double check in detail. + inode_add(&old_inode); + } + } else if (!inode_is_empty(&args.inode)) { + // Old inode is monitored and will land on a path that has a + // monitored parent but the path itself is not monitored, we + // stop tracking the inode + inode_remove(&old_inode); + } + break; + + case MONITORED_BY_INODE: + // If we landed here, the new path already has an inode that is + // being tracked and is about to be overwritten, we need to remove + // it from the map + inode_remove(&args.inode); + if (old_monitored != MONITORED_BY_INODE) { + // Old inode is not monitored, but is landing in a monitored + // path that uses inode tracking. + inode_add(&old_inode); + } + break; } - submit_rename_event(&args, old_path->path, &old_inode); + submit_rename_event(&args, old_path->path, &old_inode, old_monitored); return 0; error: @@ -235,7 +294,8 @@ int BPF_PROG(trace_path_mkdir, struct path* dir, struct dentry* dentry, umode_t struct inode* parent_inode_ptr = BPF_CORE_READ(dir, dentry, d_inode); inode_key_t parent_inode = inode_to_key(parent_inode_ptr); - if (should_track_mkdir(parent_inode, path) != PARENT_MONITORED) { + monitored_t monitored = should_track_mkdir(parent_inode, path); + if (monitored != MONITORED_BY_PARENT) { m->path_mkdir.ignored++; return 0; } @@ -266,6 +326,7 @@ int BPF_PROG(trace_path_mkdir, struct path* dir, struct dentry* dentry, umode_t return 0; } mkdir_ctx->parent_inode = parent_inode; + mkdir_ctx->monitored = monitored; return 0; } @@ -295,6 +356,7 @@ int BPF_PROG(trace_d_instantiate, struct dentry* dentry, struct inode* inode) { } args.filename = mkdir_ctx->path; args.parent_inode = mkdir_ctx->parent_inode; + args.monitored = mkdir_ctx->monitored; args.inode = inode_to_key(inode); diff --git a/fact-ebpf/src/bpf/types.h b/fact-ebpf/src/bpf/types.h index 95c67e86..67d8df49 100644 --- a/fact-ebpf/src/bpf/types.h +++ b/fact-ebpf/src/bpf/types.h @@ -42,6 +42,13 @@ typedef struct inode_key_t { unsigned long dev; } inode_key_t; +typedef enum monitored_t { + NOT_MONITORED = 0, + MONITORED_BY_INODE, + MONITORED_BY_PATH, + MONITORED_BY_PARENT, +} monitored_t; + // We can't use bool here because it is not a standard C type, we would // need to include vmlinux.h but that would explode our Rust bindings. // For the time being we just keep a char. @@ -64,6 +71,7 @@ struct event_t { char filename[PATH_MAX]; inode_key_t inode; inode_key_t parent_inode; + monitored_t monitored; file_activity_type_t type; union { struct { @@ -77,8 +85,9 @@ struct event_t { } old, new; } chown; struct { - char old_filename[PATH_MAX]; - inode_key_t old_inode; + char filename[PATH_MAX]; + inode_key_t inode; + monitored_t monitored; } rename; }; }; @@ -101,6 +110,7 @@ struct path_prefix_t { struct mkdir_context_t { char path[PATH_MAX]; inode_key_t parent_inode; + monitored_t monitored; }; // Metrics types diff --git a/fact-ebpf/src/lib.rs b/fact-ebpf/src/lib.rs index c4c95ec6..5d5984ab 100644 --- a/fact-ebpf/src/lib.rs +++ b/fact-ebpf/src/lib.rs @@ -103,6 +103,27 @@ impl Serialize for inode_key_t { unsafe impl Pod for inode_key_t {} +impl Default for monitored_t { + fn default() -> Self { + monitored_t::NOT_MONITORED + } +} + +impl Serialize for monitored_t { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + match *self { + monitored_t::NOT_MONITORED => "not monitored".serialize(serializer), + monitored_t::MONITORED_BY_INODE => "by inode".serialize(serializer), + monitored_t::MONITORED_BY_PATH => "by path".serialize(serializer), + monitored_t::MONITORED_BY_PARENT => "by parent".serialize(serializer), + _ => unreachable!("Invalid monitored_t value: {self:?}"), + } + } +} + impl metrics_by_hook_t { fn accumulate(&self, other: &metrics_by_hook_t) -> metrics_by_hook_t { let mut m = metrics_by_hook_t { ..*self }; diff --git a/fact/src/bpf/mod.rs b/fact/src/bpf/mod.rs index 38fe269d..94fd6a08 100644 --- a/fact/src/bpf/mod.rs +++ b/fact/src/bpf/mod.rs @@ -9,7 +9,7 @@ use aya::{ use checks::Checks; use globset::{Glob, GlobSet, GlobSetBuilder}; use libc::c_char; -use log::{error, info}; +use log::{error, info, warn}; use tokio::{ io::unix::AsyncFd, sync::{mpsc, watch}, @@ -160,7 +160,9 @@ impl Bpf { // Remove old prefixes for p in self.paths.iter().filter(|p| !new_paths.contains(p)) { - path_prefix.remove(&(*p).into())?; + if let Err(e) = path_prefix.remove(&(*p).into()) { + warn!("Failed to remove path prefix: {e:#?}"); + } } self.paths = new_paths; @@ -228,7 +230,12 @@ impl Bpf { let event: &event_t = unsafe { &*(event.as_ptr() as *const _) }; let event = match Event::try_from(event) { Ok(event) => { - if event.is_ignored(&self.paths_globset) { + // If the event is monitored by parent, we need to check + // its host path, but we don't have that context here, + // so we let the event go into HostScanner and make the + // decision there. + if !event.is_monitored_by_parent() && + event.is_ignored(&self.paths_globset) { event_counter.dropped(); continue; } diff --git a/fact/src/event/mod.rs b/fact/src/event/mod.rs index 1854d7af..a5e96c47 100644 --- a/fact/src/event/mod.rs +++ b/fact/src/event/mod.rs @@ -10,7 +10,7 @@ use globset::GlobSet; use log::warn; use serde::Serialize; -use fact_ebpf::{PATH_MAX, event_t, file_activity_type_t, inode_key_t}; +use fact_ebpf::{PATH_MAX, event_t, file_activity_type_t, inode_key_t, monitored_t}; use crate::host_info; use process::Process; @@ -95,6 +95,7 @@ impl Event { host_file, inode: Default::default(), parent_inode: Default::default(), + monitored: Default::default(), }; let file = match data { EventTestData::Creation => FileData::Creation(inner), @@ -143,6 +144,10 @@ impl Event { matches!(self.file, FileData::Unlink(_)) } + pub fn is_rename(&self) -> bool { + matches!(self.file, FileData::Rename(_)) + } + /// Unwrap the inner FileData and return the inode that triggered /// the event. /// @@ -195,7 +200,7 @@ impl Event { } } - fn get_old_filename(&self) -> Option<&PathBuf> { + pub fn get_old_filename(&self) -> Option<&PathBuf> { match &self.file { FileData::Rename(data) => Some(&data.old.filename), _ => None, @@ -214,6 +219,13 @@ impl Event { } } + pub fn get_old_host_path(&self) -> Option<&PathBuf> { + match &self.file { + FileData::Rename(data) => Some(&data.old.host_file), + _ => None, + } + } + /// Set the `host_file` field of the event to the one provided. /// /// In the case of operations that involve two paths, like rename, @@ -238,6 +250,25 @@ impl Event { } } + pub fn get_monitored(&self) -> monitored_t { + match &self.file { + FileData::Open(data) => data.monitored, + FileData::Creation(data) => data.monitored, + FileData::MkDir(data) => data.monitored, + FileData::Unlink(data) => data.monitored, + FileData::Chmod(data) => data.inner.monitored, + FileData::Chown(data) => data.inner.monitored, + FileData::Rename(data) => data.new.monitored, + } + } + + pub fn get_old_monitored(&self) -> Option { + match &self.file { + FileData::Rename(data) => Some(data.old.monitored), + _ => None, + } + } + /// Determine if the event should be ignored. /// /// With wildcards, the kernel can only match on the inode and @@ -249,13 +280,19 @@ impl Event { /// /// We also need to check the old values for rename events. pub fn is_ignored(&self, globset: &GlobSet) -> bool { - self.get_inode().empty() - && self.get_old_inode().is_none_or(|inode| inode.empty()) + self.get_monitored() != monitored_t::MONITORED_BY_INODE + && self + .get_old_monitored() + .is_none_or(|m| m != monitored_t::MONITORED_BY_INODE) && !globset.is_match(self.get_filename()) && self .get_old_filename() .is_none_or(|path| !globset.is_match(path)) } + + pub fn is_monitored_by_parent(&self) -> bool { + self.get_monitored() == monitored_t::MONITORED_BY_PARENT + } } impl TryFrom<&event_t> for Event { @@ -269,6 +306,7 @@ impl TryFrom<&event_t> for Event { value.filename, value.inode, value.parent_inode, + value.monitored, value.__bindgen_anon_1, )?; @@ -320,9 +358,10 @@ impl FileData { filename: [c_char; PATH_MAX as usize], inode: inode_key_t, parent_inode: inode_key_t, + monitored: monitored_t, extra_data: fact_ebpf::event_t__bindgen_ty_1, ) -> anyhow::Result { - let inner = BaseFileData::new(filename, inode, parent_inode)?; + let inner = BaseFileData::new(filename, inode, parent_inode, monitored)?; let file = match event_type { file_activity_type_t::FILE_ACTIVITY_OPEN => FileData::Open(inner), file_activity_type_t::FILE_ACTIVITY_CREATION => FileData::Creation(inner), @@ -347,11 +386,17 @@ impl FileData { FileData::Chown(data) } file_activity_type_t::FILE_ACTIVITY_RENAME => { - let old_filename = unsafe { extra_data.rename.old_filename }; - let old_inode = unsafe { extra_data.rename.old_inode }; + let old_filename = unsafe { extra_data.rename.filename }; + let old_inode = unsafe { extra_data.rename.inode }; + let old_monitored = unsafe { extra_data.rename.monitored }; let data = RenameFileData { new: inner, - old: BaseFileData::new(old_filename, old_inode, Default::default())?, + old: BaseFileData::new( + old_filename, + old_inode, + Default::default(), + old_monitored, + )?, }; FileData::Rename(data) } @@ -425,6 +470,7 @@ pub struct BaseFileData { host_file: PathBuf, inode: inode_key_t, parent_inode: inode_key_t, + monitored: monitored_t, } impl BaseFileData { @@ -432,12 +478,14 @@ impl BaseFileData { filename: [c_char; PATH_MAX as usize], inode: inode_key_t, parent_inode: inode_key_t, + monitored: monitored_t, ) -> anyhow::Result { Ok(BaseFileData { filename: sanitize_d_path(&filename), host_file: PathBuf::new(), // this field is set by HostScanner inode, parent_inode, + monitored, }) } } diff --git a/fact/src/host_scanner.rs b/fact/src/host_scanner.rs index f49c621f..af143e44 100644 --- a/fact/src/host_scanner.rs +++ b/fact/src/host_scanner.rs @@ -28,7 +28,8 @@ use std::{ use anyhow::{Context, bail}; use aya::maps::MapData; -use fact_ebpf::{inode_key_t, inode_value_t}; +use fact_ebpf::{inode_key_t, inode_value_t, monitored_t}; +use globset::{Glob, GlobSet, GlobSetBuilder}; use log::{debug, info, warn}; use tokio::{ sync::{Notify, broadcast, mpsc, watch}, @@ -54,6 +55,8 @@ pub struct HostScanner { tx: broadcast::Sender>, metrics: HostScannerMetrics, + + paths_globset: GlobSet, } impl HostScanner { @@ -68,6 +71,7 @@ impl HostScanner { let kernel_inode_map = RefCell::new(bpf.take_inode_map()?); let inode_map = RefCell::new(std::collections::HashMap::new()); let (tx, _) = broadcast::channel(100); + let paths_globset = HostScanner::build_globset(paths.borrow().as_slice())?; let host_scanner = HostScanner { kernel_inode_map, @@ -78,6 +82,7 @@ impl HostScanner { rx, tx, metrics, + paths_globset, }; // Run an initial scan to fill in the inode map @@ -86,6 +91,22 @@ impl HostScanner { Ok(host_scanner) } + fn build_globset(paths: &[PathBuf]) -> anyhow::Result { + let mut builder = GlobSetBuilder::new(); + for p in paths.iter() { + let Some(glob_str) = p.to_str() else { + bail!("failed to convert path {} to string", p.display()); + }; + + builder.add( + Glob::new(glob_str) + .with_context(|| format!("invalid glob {}", glob_str)) + .unwrap(), + ); + } + Ok(builder.build()?) + } + fn scan(&self) -> anyhow::Result<()> { debug!("Host scan started"); self.metrics.scan_inc(ScanLabels::Scans); @@ -233,6 +254,127 @@ impl HostScanner { self.metrics.scan_inc(ScanLabels::FileRemoved); } + fn handle_rename_event(&self, event: &mut Event) { + match event.get_monitored() { + monitored_t::MONITORED_BY_INODE => { + // This condition means a file is being renamed and taking the + // place of an existing, tracked file. We need to remove the + // inode we are landing on and put the associated host path in + // the old inode. + let mut inode_map = self.inode_map.borrow_mut(); + let Some(path) = inode_map.remove(event.get_inode()) else { + warn!("Old path was not found for inode tracked event"); + return; + }; + let Some(old_inode) = event.get_old_inode() else { + unreachable!("old inode not found for rename event"); + }; + inode_map.insert(*old_inode, path); + } + monitored_t::NOT_MONITORED + if event.get_old_monitored() == Some(monitored_t::MONITORED_BY_INODE) => + { + // We are landing on a path that is not tracked at all, remove + // the entries for the old path from the map + let Some(old_host_path) = event.get_old_host_path() else { + warn!("Rename event did not have old host path for inode tracked item"); + return; + }; + self.inode_map.borrow_mut().retain(|inode, path| { + if !path.starts_with(old_host_path) { + return true; + } + + let _ = self.kernel_inode_map.borrow_mut().remove(inode); + false + }); + } + monitored_t::NOT_MONITORED => { + // The new path is not monitored and the old path is most likely + // matching by path, we don't need to do anything in this case. + } + monitored_t::MONITORED_BY_PARENT if !event.get_inode().empty() => { + // The parent for the target is monitored, but the file itself + // is not. Remove the entry for the old file from the map. + self.inode_map.borrow_mut().remove( + event + .get_old_inode() + .expect("rename event did not have old inode"), + ); + } + monitored_t::MONITORED_BY_PARENT + if event.get_old_monitored() == Some(monitored_t::MONITORED_BY_INODE) => + { + // The target is monitored by parent and we are landing on a + // path that didn't hold anything, we need to figure out the + // host path and check if we should track it. + let mut inode_map = self.inode_map.borrow_mut(); + let Some(new_host_parent) = inode_map.get(event.get_parent_inode()) else { + warn!("Failed to get parent host path"); + return; + }; + let Some(filename) = event.get_filename().file_name() else { + warn!("Failed to get last component from event: {event:#?}"); + return; + }; + let new_host_path = new_host_parent.join(filename); + let Some(old_host_path) = event.get_old_host_path() else { + unreachable!("Rename event did not have an old host path"); + }; + + if self.paths_globset.is_match(&new_host_path) { + // New path needs to be tracked. + // Move all entries for the old host path to the new one + for (_, path) in inode_map.iter_mut() { + if let Ok(suffix) = path.strip_prefix(old_host_path) { + if suffix == Path::new("") { + *path = new_host_path.clone(); + } else { + *path = new_host_path.join(suffix); + } + } + } + + // Add the new host path to the event + event.set_host_path(new_host_path); + } else { + // New path is not tracked, remove old entries + inode_map.retain(|inode, path| { + if !path.starts_with(old_host_path) { + return true; + } + if let Err(e) = self.kernel_inode_map.borrow_mut().remove(inode) { + warn!("Failed to remove inode kernel entry: {e:?}"); + } + false + }); + } + } + monitored_t::MONITORED_BY_PARENT => { + // In this case, the target location might be monitored, but we + // don't have any information of the host path for the old path, + // best we can do is attempt to scan the file system and fix the + // inode maps that way. + if let Err(e) = self.scan() { + warn!("Scan failed: {e:?}"); + } + + // Attempt to update the host path with the old inode + if let Some(old_inode) = event.get_old_inode() + && let Some(path) = self.inode_map.borrow().get(old_inode) + { + event.set_host_path(path.clone()); + } + } + monitored_t::MONITORED_BY_PATH => { + // Nothing to do here, having one side of the rename monitored + // by path means at best the other side is also monitored by + // path, no inode tracking is involved. + } + _ => unreachable!("Invalid monitored value"), + } + } + /// Periodically notify the host scanner main task that a scan needs /// to happen. /// @@ -286,6 +428,11 @@ impl HostScanner { warn!("Failed to handle creation event: {e}"); } + // Skip directory creation events - we track them internally but don't send to sensor + if event.is_mkdir() { + continue; + } + if let Some(host_path) = self.get_host_path(Some(event.get_inode())) { self.metrics.scan_inc(ScanLabels::InodeHit); event.set_host_path(host_path); @@ -301,17 +448,31 @@ impl HostScanner { self.handle_unlink_event(&event); } - // Skip directory creation events - we track them internally but don't send to sensor - if !event.is_mkdir() { - let event = Arc::new(event); - if let Err(e) = self.tx.send(event) { - self.metrics.events.dropped(); - warn!("Failed to send event: {e}"); - } + if event.is_rename() { self.handle_rename_event(&mut event); } + + if event.is_monitored_by_parent() && + !self.paths_globset.is_match(event.get_host_path()) { + // The event was monitored by parent, but the host + // path is not to be monitored, so we ignore the + // event and attempt to remove the inode from the + // maps to prevent it from sending more events. + self.inode_map.borrow_mut().remove(event.get_inode()); + let _ = self.kernel_inode_map.borrow_mut().remove(event.get_inode()); + self.metrics.events.ignored(); + continue; + } + + let event = Arc::new(event); + if let Err(e) = self.tx.send(event) { + self.metrics.events.dropped(); + warn!("Failed to send event: {e}"); } }, _ = scan_trigger.notified() => self.scan()?, - _ = self.paths.changed() => self.scan()?, + _ = self.paths.changed() => { + self.paths_globset = HostScanner::build_globset(self.paths.borrow().as_slice())?; + self.scan()?; + } _ = self.running.changed() => { if !*self.running.borrow() { break; diff --git a/fact/src/metrics/host_scanner.rs b/fact/src/metrics/host_scanner.rs index f5197cf5..81bef60c 100644 --- a/fact/src/metrics/host_scanner.rs +++ b/fact/src/metrics/host_scanner.rs @@ -33,7 +33,12 @@ pub struct HostScannerMetrics { impl HostScannerMetrics { pub(super) fn new() -> Self { - let labels = [EventLabels::Total, EventLabels::Added, EventLabels::Dropped]; + let labels = [ + EventLabels::Total, + EventLabels::Added, + EventLabels::Dropped, + EventLabels::Ignored, + ]; let events = EventCounter::new( "host_scanner_events", "Events processed by the host scanner component", diff --git a/fact/src/metrics/mod.rs b/fact/src/metrics/mod.rs index 97213c83..c7a7cd73 100644 --- a/fact/src/metrics/mod.rs +++ b/fact/src/metrics/mod.rs @@ -100,6 +100,19 @@ impl EventCounter { .unwrap() .inc_by(n); } + + /// Increment the counter for the Ignored label. + /// + /// Panics if the counter did not add the Ignored label as part of + /// its creation step. + pub fn ignored(&self) { + self.counter + .get(&MetricEvents { + label: LabelValues::Ignored, + }) + .expect("Ignored label not found") + .inc(); + } } #[derive(Debug, Clone)] diff --git a/tests/test_path_rename.py b/tests/test_path_rename.py index 98c3447a..6942c3ea 100644 --- a/tests/test_path_rename.py +++ b/tests/test_path_rename.py @@ -28,7 +28,6 @@ def test_rename(monitored_dir, server, filename): fut = join_path_with_filename(monitored_dir, filename) old_fut = os.path.join(monitored_dir, 'file.txt') - # Create a new file, it will have an empty host_path. with open(old_fut, 'w') as f: f.write('This is a test') os.rename(old_fut, fut) @@ -37,23 +36,15 @@ def test_rename(monitored_dir, server, filename): # Convert fut to string for the Event, replacing invalid UTF-8 with U+FFFD fut = path_to_string(fut) - # TODO: Current behavior is incorrect. The inode map should be updated - # during rename events so that host_path reflects the new path. - # Expected correct behavior: - # - First rename: host_path should be `fut` (new path), old_host_path should be `old_fut` - # - Second rename: host_path should be `old_fut`, old_host_path should be `fut` - # Current behavior: host_path remains the original path (old_fut) because - # the inode map is not updated on rename events. old_host_path is empty. - events = [ - Event(process=Process.from_proc(), event_type=EventType.CREATION, + p = Process.from_proc() + server.wait_events([ + Event(process=p, event_type=EventType.CREATION, file=old_fut, host_path=old_fut), - Event(process=Process.from_proc(), event_type=EventType.RENAME, - file=fut, host_path='', old_file=old_fut, old_host_path=old_fut), - Event(process=Process.from_proc(), event_type=EventType.RENAME, - file=old_fut, host_path='', old_file=fut, old_host_path=old_fut), - ] - - server.wait_events(events) + Event(process=p, event_type=EventType.RENAME, file=fut, + host_path=fut, old_file=old_fut, old_host_path=old_fut), + Event(process=p, event_type=EventType.RENAME, file=old_fut, + host_path=old_fut, old_file=fut, old_host_path=fut), + ]) def test_ignored(monitored_dir, ignored_dir, server): @@ -67,6 +58,7 @@ def test_ignored(monitored_dir, ignored_dir, server): server: The server instance to communicate with. """ ignored_path = os.path.join(ignored_dir, 'test.txt') + p = Process.from_proc() with open(ignored_path, 'w') as f: f.write('This is to be ignored') @@ -75,26 +67,22 @@ def test_ignored(monitored_dir, ignored_dir, server): # Renaming in between ignored paths should not generate events os.rename(ignored_path, new_ignored_path) - # Renaming to a monitored path generates an event + # Renaming to a monitored path requires a scan, we need to wait for + # it before we can continue modifying the FS new_path = os.path.join(monitored_dir, 'rename.txt') os.rename(new_ignored_path, new_path) + server.wait_events([ + Event(process=p, event_type=EventType.RENAME, + file=new_path, host_path=new_path, old_file=new_ignored_path, old_host_path=''), + ]) # Renaming from a monitored path generates an event too os.rename(new_path, ignored_path) - p = Process.from_proc() - # TODO: Current behavior is incorrect for rename events. - # Expected: When renaming from ignored to monitored, host_path should be new_path. - # When renaming from monitored to ignored, old_host_path should be new_path. - # Current: The inode map is not updated on renames, and old_host_path is not populated. - events = [ - Event(process=p, event_type=EventType.RENAME, - file=new_path, host_path='', old_file=new_ignored_path, old_host_path=''), + server.wait_events([ Event(process=p, event_type=EventType.RENAME, - file=ignored_path, host_path='', old_file=new_path, old_host_path=''), - ] - - server.wait_events(events) + file=ignored_path, host_path='', old_file=new_path, old_host_path=new_path), + ]) def test_rename_dir(monitored_dir, ignored_dir, server): @@ -127,24 +115,26 @@ def test_rename_dir(monitored_dir, ignored_dir, server): # This rename should generate no events os.rename(ignored_dut, new_ignored_dut) - # The next three renames need to generate one event each + # Going from a non-monitored directory to a monitored one requires a scan of + # the filesystem to add any subdirectories and files, so we need to wait for + # it to end before we can continue modifying the FS. os.rename(new_ignored_dut, dut) + + p = Process.from_proc() + server.wait_events([ + Event(process=p, event_type=EventType.RENAME, file=dut, + host_path=dut, old_file=new_ignored_dut, old_host_path=''), + ]) + + # The following two event should produce full events without scanning the FS. os.rename(dut, new_dut) os.rename(new_dut, ignored_dut) - p = Process.from_proc() - # TODO: Current behavior is incorrect for rename events. - # Expected: host_path should reflect the new path after rename, - # old_host_path should reflect the old path if it was monitored. - # Current: The inode map is not updated on renames, so host_path remains empty - # or shows the wrong path. old_host_path is not populated. events = [ - Event(process=p, event_type=EventType.RENAME, file=dut, - host_path='', old_file=new_ignored_dut, old_host_path=''), Event(process=p, event_type=EventType.RENAME, - file=new_dut, host_path='', old_file=dut, old_host_path=''), + file=new_dut, host_path=new_dut, old_file=dut, old_host_path=dut), Event(process=p, event_type=EventType.RENAME, - file=ignored_dut, host_path='', old_file=new_dut, old_host_path=''), + file=ignored_dut, host_path='', old_file=new_dut, old_host_path=new_dut), ] server.wait_events(events) From c115b4ddb21f1dfd08b9a606910348dc27731e71 Mon Sep 17 00:00:00 2001 From: Mauro Ezequiel Moltrasio Date: Fri, 10 Apr 2026 12:04:32 +0200 Subject: [PATCH 2/2] ROX-33216: add more test cases for path_rename In order to validate more of the inode tracking functionality in path_rename more test cases and additional validations are added. --- tests/test_path_rename.py | 161 +++++++++++++++++++++++++++++++++++--- 1 file changed, 152 insertions(+), 9 deletions(-) diff --git a/tests/test_path_rename.py b/tests/test_path_rename.py index 6942c3ea..371f870b 100644 --- a/tests/test_path_rename.py +++ b/tests/test_path_rename.py @@ -101,6 +101,20 @@ def test_rename_dir(monitored_dir, ignored_dir, server): ignored_dir: Temporary directory path that is not monitored by fact. server: The server instance to communicate with. """ + def touch_test_files(directory, process=None) -> list[Event]: + events = [] + for i in range(3): + with open(os.path.join(directory, f'{i}.txt'), 'w') as f: + f.write('This is a test') + if process is not None: + events.append( + Event(process=process, + event_type=EventType.OPEN, + file=os.path.join(directory, f'{i}.txt'), + host_path=os.path.join(directory, f'{i}.txt')) + ) + return events + # Directory Under Test dut = os.path.join(monitored_dir, 'some-dir') new_dut = os.path.join(monitored_dir, 'other-dir') @@ -108,9 +122,7 @@ def test_rename_dir(monitored_dir, ignored_dir, server): new_ignored_dut = os.path.join(ignored_dir, 'other-dir') os.mkdir(ignored_dut) - for i in range(3): - with open(os.path.join(ignored_dut, f'{i}.txt'), 'w') as f: - f.write('This is a test') + touch_test_files(ignored_dut) # This rename should generate no events os.rename(ignored_dut, new_ignored_dut) @@ -126,16 +138,81 @@ def test_rename_dir(monitored_dir, ignored_dir, server): host_path=dut, old_file=new_ignored_dut, old_host_path=''), ]) - # The following two event should produce full events without scanning the FS. + events = touch_test_files(dut, p) + + # The following renames should produce full events without scanning the FS. os.rename(dut, new_dut) - os.rename(new_dut, ignored_dut) + events.extend([ + Event(process=p, event_type=EventType.RENAME, file=new_dut, + host_path=new_dut, old_file=dut, old_host_path=dut), + # Check the renamed subfiles are properly tracked + *touch_test_files(new_dut, p), + ]) - events = [ - Event(process=p, event_type=EventType.RENAME, - file=new_dut, host_path=new_dut, old_file=dut, old_host_path=dut), + os.rename(new_dut, ignored_dut) + events.append( Event(process=p, event_type=EventType.RENAME, file=ignored_dut, host_path='', old_file=new_dut, old_host_path=new_dut), - ] + ) + + server.wait_events(events) + + +@pytest.mark.parametrize('from_monitored,to_monitored', [ + pytest.param(True, True, id='both_monitored'), + pytest.param(False, True, id='target_monitored'), + pytest.param(True, False, id='bullet_monitored'), +]) +def test_rename_overwrite(from_monitored, to_monitored, monitored_dir, ignored_dir, server): + events = [] + p = Process.from_proc() + if from_monitored: + bullet = os.path.join(monitored_dir, 'bullet.txt') + events.append( + Event(process=p, + event_type=EventType.CREATION, + file=bullet, + host_path=bullet, + )) + else: + bullet = os.path.join(ignored_dir, 'bullet.txt') + + if to_monitored: + target = os.path.join(monitored_dir, 'target.txt') + events.append( + Event(process=p, + event_type=EventType.CREATION, + file=target, + host_path=target, + )) + else: + target = os.path.join(ignored_dir, 'target.txt') + + # Create both files in the order they are expected in `events` + for path in [bullet, target]: + with open(path, 'w') as f: + f.write('This is a test') + + os.rename(bullet, target) + events.append( + Event(process=p, + event_type=EventType.RENAME, + file=target, + host_path=target if to_monitored else '', + old_file=bullet, + old_host_path=bullet if from_monitored else '', + )) + + if to_monitored: + # One final event to check the mapping is persisted correctly + with open(target, 'w') as f: + f.write('Check mapping') + events.append( + Event(process=p, + event_type=EventType.OPEN, + file=target, + host_path=target, + )) server.wait_events(events) @@ -203,3 +280,69 @@ def test_mounted_dir(test_container, ignored_dir, server): ] server.wait_events(events) + + +def test_cross_mountpoints(test_container, monitored_dir, server): + """ + Attempt to rename files/directories across mountpoints + + This test does not necessarily fit here, since it won't trigger the + path_rename LSM hook, but the test ensures that this hook is + defintely not triggered when an inode crosses a mount point, so it + doesn't fit but it kind of does? ¯\\_(ツ)_/¯ + """ + mounted_file = '/unmonitored/test.txt' + host_path = os.path.join(monitored_dir, 'test.txt') + ovfs_file = '/container-dir/test.txt' + + test_container.exec_run(f'touch {mounted_file}') + # Get owner uid and gid before moving the file + stat = os.stat(host_path) + owner_uid = stat.st_uid + owner_gid = stat.st_gid + mode = stat.st_mode + + test_container.exec_run(f'mv {mounted_file} {ovfs_file}') + test_container.exec_run(f'mv {ovfs_file} {mounted_file}') + + touch = Process.in_container( + exe_path='/usr/bin/touch', + args=f'touch {mounted_file}', + name='touch', + container_id=test_container.id[:12], + ) + first_rename = Process.in_container( + exe_path='/usr/bin/mv', + args=f'mv {mounted_file} {ovfs_file}', + name='mv', + container_id=test_container.id[:12], + ) + second_rename = Process.in_container( + exe_path='/usr/bin/mv', + args=f'mv {ovfs_file} {mounted_file}', + name='mv', + container_id=test_container.id[:12], + ) + + server.wait_events([ + Event(process=touch, event_type=EventType.OPEN, + file=mounted_file, host_path=host_path), + Event(process=first_rename, event_type=EventType.CREATION, + file=ovfs_file, host_path=''), + Event(process=first_rename, event_type=EventType.OPEN, + file=ovfs_file, host_path=''), + Event(process=first_rename, event_type=EventType.OWNERSHIP, + file=ovfs_file, host_path='', owner_uid=owner_uid, owner_gid=owner_gid), + Event(process=first_rename, event_type=EventType.PERMISSION, + file=ovfs_file, host_path='', mode=mode), + Event(process=first_rename, event_type=EventType.UNLINK, + file=mounted_file, host_path=host_path), + Event(process=second_rename, event_type=EventType.CREATION, + file=mounted_file, host_path=host_path), + Event(process=second_rename, event_type=EventType.OWNERSHIP, + file=mounted_file, host_path=host_path, owner_uid=owner_uid, owner_gid=owner_gid), + Event(process=second_rename, event_type=EventType.PERMISSION, + file=mounted_file, host_path=host_path, mode=mode), + Event(process=second_rename, event_type=EventType.UNLINK, + file=ovfs_file, host_path=''), + ])