diff --git a/fact-ebpf/src/bpf/events.h b/fact-ebpf/src/bpf/events.h index 276a1d97..a3d30ea9 100644 --- a/fact-ebpf/src/bpf/events.h +++ b/fact-ebpf/src/bpf/events.h @@ -18,6 +18,7 @@ struct submit_event_args_t { const char* filename; inode_key_t inode; inode_key_t parent_inode; + monitored_t monitored; }; __always_inline static bool reserve_event(struct submit_event_args_t* args) { @@ -33,6 +34,7 @@ __always_inline static void __submit_event(struct submit_event_args_t* args, bool use_bpf_d_path) { struct event_t* event = args->event; event->timestamp = bpf_ktime_get_boot_ns(); + event->monitored = args->monitored; inode_copy(&event->inode, &args->inode); inode_copy(&event->parent_inode, &args->parent_inode); bpf_probe_read_str(event->filename, PATH_MAX, args->filename); @@ -110,14 +112,16 @@ __always_inline static void submit_ownership_event(struct submit_event_args_t* a __always_inline static void submit_rename_event(struct submit_event_args_t* args, const char old_filename[PATH_MAX], - inode_key_t* old_inode) { + inode_key_t* old_inode, + monitored_t old_monitored) { if (!reserve_event(args)) { return; } args->event->type = FILE_ACTIVITY_RENAME; - bpf_probe_read_str(args->event->rename.old_filename, PATH_MAX, old_filename); - inode_copy(&args->event->rename.old_inode, old_inode); + bpf_probe_read_str(args->event->rename.filename, PATH_MAX, old_filename); + inode_copy(&args->event->rename.inode, old_inode); + args->event->rename.monitored = old_monitored; __submit_event(args, path_hooks_support_bpf_d_path); } diff --git a/fact-ebpf/src/bpf/file.h b/fact-ebpf/src/bpf/file.h index efdccfc5..942ecbdf 100644 --- a/fact-ebpf/src/bpf/file.h +++ b/fact-ebpf/src/bpf/file.h @@ -27,18 +27,17 @@ __always_inline static bool path_is_monitored(struct bound_path_t* path) { return res; } -__always_inline static inode_monitored_t is_monitored(inode_key_t* inode, struct bound_path_t* path, const inode_key_t* parent) { +__always_inline static monitored_t is_monitored(const inode_key_t* inode, struct bound_path_t* path, const inode_key_t* parent) { const inode_value_t* volatile inode_value = inode_get(inode); const inode_value_t* volatile parent_value = inode_get(parent); - inode_monitored_t status = inode_is_monitored(inode_value, parent_value); + monitored_t status = inode_is_monitored(inode_value, parent_value); if (status != NOT_MONITORED) { return status; } - inode_reset(inode); if (path_is_monitored(path)) { - return MONITORED; + return MONITORED_BY_PATH; } return NOT_MONITORED; @@ -46,15 +45,15 @@ __always_inline static inode_monitored_t is_monitored(inode_key_t* inode, struct // Check if a new directory should be tracked based on its parent and path. // This is used during mkdir operations where the child inode doesn't exist yet. -__always_inline static inode_monitored_t should_track_mkdir(inode_key_t parent_inode, struct bound_path_t* child_path) { +__always_inline static monitored_t should_track_mkdir(inode_key_t parent_inode, struct bound_path_t* child_path) { const inode_value_t* volatile parent_value = inode_get(&parent_inode); if (parent_value != NULL) { - return PARENT_MONITORED; + return MONITORED_BY_PARENT; } if (path_is_monitored(child_path)) { - return MONITORED; + return MONITORED_BY_PATH; } return NOT_MONITORED; diff --git a/fact-ebpf/src/bpf/inode.h b/fact-ebpf/src/bpf/inode.h index 6b213c3b..01f01814 100644 --- a/fact-ebpf/src/bpf/inode.h +++ b/fact-ebpf/src/bpf/inode.h @@ -85,20 +85,21 @@ __always_inline static void inode_reset(struct inode_key_t* inode) { inode->dev = 0; } -typedef enum inode_monitored_t { - NOT_MONITORED = 0, - MONITORED, - PARENT_MONITORED, -} inode_monitored_t; +/* + * Check if the supplied inode is empty + */ +__always_inline static bool inode_is_empty(struct inode_key_t* inode) { + return inode->inode == 0 && inode->dev == 0; +} // Check if the provided inode or its parent is being monitored. -__always_inline static inode_monitored_t inode_is_monitored(const inode_value_t* volatile inode, const inode_value_t* volatile parent_inode) { +__always_inline static monitored_t inode_is_monitored(const inode_value_t* volatile inode, const inode_value_t* volatile parent_inode) { if (inode != NULL) { - return MONITORED; + return MONITORED_BY_INODE; } if (parent_inode != NULL) { - return PARENT_MONITORED; + return MONITORED_BY_PARENT; } return NOT_MONITORED; diff --git a/fact-ebpf/src/bpf/main.c b/fact-ebpf/src/bpf/main.c index c6effe8d..258aba0c 100644 --- a/fact-ebpf/src/bpf/main.c +++ b/fact-ebpf/src/bpf/main.c @@ -52,14 +52,13 @@ int BPF_PROG(trace_file_open, struct file* file) { struct inode* parent_inode_ptr = parent_dentry ? BPF_CORE_READ(parent_dentry, d_inode) : NULL; args.parent_inode = inode_to_key(parent_inode_ptr); - inode_monitored_t status = is_monitored(&args.inode, path, &args.parent_inode); - - if (status == PARENT_MONITORED && event_type == FILE_ACTIVITY_CREATION) { - inode_add(&args.inode); + args.monitored = is_monitored(&args.inode, path, &args.parent_inode); + if (args.monitored == NOT_MONITORED) { + goto ignored; } - if (status == NOT_MONITORED) { - goto ignored; + if (args.monitored == MONITORED_BY_PARENT && event_type == FILE_ACTIVITY_CREATION) { + inode_add(&args.inode); } submit_open_event(&args, event_type); @@ -90,8 +89,9 @@ int BPF_PROG(trace_path_unlink, struct path* dir, struct dentry* dentry) { args.filename = path->path; args.inode = inode_to_key(dentry->d_inode); + args.monitored = is_monitored(&args.inode, path, NULL); - if (is_monitored(&args.inode, path, NULL) == NOT_MONITORED) { + if (args.monitored == NOT_MONITORED) { m->path_unlink.ignored++; return 0; } @@ -122,8 +122,9 @@ int BPF_PROG(trace_path_chmod, struct path* path, umode_t mode) { args.filename = bound_path->path; args.inode = inode_to_key(path->dentry->d_inode); + args.monitored = is_monitored(&args.inode, bound_path, NULL); - if (is_monitored(&args.inode, bound_path, NULL) == NOT_MONITORED) { + if (args.monitored == NOT_MONITORED) { args.metrics->ignored++; return 0; } @@ -156,8 +157,9 @@ int BPF_PROG(trace_path_chown, struct path* path, unsigned long long uid, unsign args.filename = bound_path->path; args.inode = inode_to_key(path->dentry->d_inode); + args.monitored = is_monitored(&args.inode, bound_path, NULL); - if (is_monitored(&args.inode, bound_path, NULL) == NOT_MONITORED) { + if (args.monitored == NOT_MONITORED) { args.metrics->ignored++; return 0; } @@ -197,18 +199,75 @@ int BPF_PROG(trace_path_rename, struct path* old_dir, } args.inode = inode_to_key(new_dentry->d_inode); + args.parent_inode = inode_to_key(new_dir->dentry->d_inode); + args.monitored = is_monitored(&args.inode, new_path, &args.parent_inode); inode_key_t old_inode = inode_to_key(old_dentry->d_inode); - - inode_monitored_t old_monitored = is_monitored(&old_inode, old_path, NULL); - inode_monitored_t new_monitored = is_monitored(&args.inode, new_path, NULL); - - if (old_monitored == NOT_MONITORED && new_monitored == NOT_MONITORED) { - args.metrics->ignored++; - return 0; + monitored_t old_monitored = is_monitored(&old_inode, old_path, NULL); + + // From this point on we need to handle inode tracking. + // + // The result will be a combination of whether we are already tracking + // the old inode or not and whether the target path has an existing + // object that is about to be overwritten and if said object is + // tracked by inode or not. + switch (args.monitored) { + case NOT_MONITORED: + if (old_monitored == NOT_MONITORED) { + m->path_rename.ignored++; + return 0; + } + + if (old_monitored == MONITORED_BY_INODE) { + // Old inode is monitored, new path is not. + // If the old path is a directory userspace will remove any + // subdirectories and files too. + inode_remove(&old_inode); + } + break; + + case MONITORED_BY_PATH: + if (old_monitored == MONITORED_BY_INODE) { + // New path is not inode tracked, old path is. + // + // This implies the inode will be crossing a FS mountpoint, + // which should never happen. When the inode crosses into a new + // mount, a new inode is created altogether. Still, we can cover + // our bases. + inode_remove(&old_inode); + } + break; + + case MONITORED_BY_PARENT: + if (old_monitored != MONITORED_BY_INODE) { + // Old inode is not monitored, new parent is. + if (inode_is_empty(&args.inode)) { + // Landing on an empty path, we track the inode in case we + // need to, userspace will double check in detail. + inode_add(&old_inode); + } + } else if (!inode_is_empty(&args.inode)) { + // Old inode is monitored and will land on a path that has a + // monitored parent but the path itself is not monitored, we + // stop tracking the inode + inode_remove(&old_inode); + } + break; + + case MONITORED_BY_INODE: + // If we landed here, the new path already has an inode that is + // being tracked and is about to be overwritten, we need to remove + // it from the map + inode_remove(&args.inode); + if (old_monitored != MONITORED_BY_INODE) { + // Old inode is not monitored, but is landing in a monitored + // path that uses inode tracking. + inode_add(&old_inode); + } + break; } - submit_rename_event(&args, old_path->path, &old_inode); + submit_rename_event(&args, old_path->path, &old_inode, old_monitored); return 0; error: @@ -235,7 +294,8 @@ int BPF_PROG(trace_path_mkdir, struct path* dir, struct dentry* dentry, umode_t struct inode* parent_inode_ptr = BPF_CORE_READ(dir, dentry, d_inode); inode_key_t parent_inode = inode_to_key(parent_inode_ptr); - if (should_track_mkdir(parent_inode, path) != PARENT_MONITORED) { + monitored_t monitored = should_track_mkdir(parent_inode, path); + if (monitored != MONITORED_BY_PARENT) { m->path_mkdir.ignored++; return 0; } @@ -266,6 +326,7 @@ int BPF_PROG(trace_path_mkdir, struct path* dir, struct dentry* dentry, umode_t return 0; } mkdir_ctx->parent_inode = parent_inode; + mkdir_ctx->monitored = monitored; return 0; } @@ -295,6 +356,7 @@ int BPF_PROG(trace_d_instantiate, struct dentry* dentry, struct inode* inode) { } args.filename = mkdir_ctx->path; args.parent_inode = mkdir_ctx->parent_inode; + args.monitored = mkdir_ctx->monitored; args.inode = inode_to_key(inode); diff --git a/fact-ebpf/src/bpf/types.h b/fact-ebpf/src/bpf/types.h index 95c67e86..67d8df49 100644 --- a/fact-ebpf/src/bpf/types.h +++ b/fact-ebpf/src/bpf/types.h @@ -42,6 +42,13 @@ typedef struct inode_key_t { unsigned long dev; } inode_key_t; +typedef enum monitored_t { + NOT_MONITORED = 0, + MONITORED_BY_INODE, + MONITORED_BY_PATH, + MONITORED_BY_PARENT, +} monitored_t; + // We can't use bool here because it is not a standard C type, we would // need to include vmlinux.h but that would explode our Rust bindings. // For the time being we just keep a char. @@ -64,6 +71,7 @@ struct event_t { char filename[PATH_MAX]; inode_key_t inode; inode_key_t parent_inode; + monitored_t monitored; file_activity_type_t type; union { struct { @@ -77,8 +85,9 @@ struct event_t { } old, new; } chown; struct { - char old_filename[PATH_MAX]; - inode_key_t old_inode; + char filename[PATH_MAX]; + inode_key_t inode; + monitored_t monitored; } rename; }; }; @@ -101,6 +110,7 @@ struct path_prefix_t { struct mkdir_context_t { char path[PATH_MAX]; inode_key_t parent_inode; + monitored_t monitored; }; // Metrics types diff --git a/fact-ebpf/src/lib.rs b/fact-ebpf/src/lib.rs index c4c95ec6..5d5984ab 100644 --- a/fact-ebpf/src/lib.rs +++ b/fact-ebpf/src/lib.rs @@ -103,6 +103,27 @@ impl Serialize for inode_key_t { unsafe impl Pod for inode_key_t {} +impl Default for monitored_t { + fn default() -> Self { + monitored_t::NOT_MONITORED + } +} + +impl Serialize for monitored_t { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + match *self { + monitored_t::NOT_MONITORED => "not monitored".serialize(serializer), + monitored_t::MONITORED_BY_INODE => "by inode".serialize(serializer), + monitored_t::MONITORED_BY_PATH => "by path".serialize(serializer), + monitored_t::MONITORED_BY_PARENT => "by parent".serialize(serializer), + _ => unreachable!("Invalid monitored_t value: {self:?}"), + } + } +} + impl metrics_by_hook_t { fn accumulate(&self, other: &metrics_by_hook_t) -> metrics_by_hook_t { let mut m = metrics_by_hook_t { ..*self }; diff --git a/fact/src/bpf/mod.rs b/fact/src/bpf/mod.rs index 38fe269d..94fd6a08 100644 --- a/fact/src/bpf/mod.rs +++ b/fact/src/bpf/mod.rs @@ -9,7 +9,7 @@ use aya::{ use checks::Checks; use globset::{Glob, GlobSet, GlobSetBuilder}; use libc::c_char; -use log::{error, info}; +use log::{error, info, warn}; use tokio::{ io::unix::AsyncFd, sync::{mpsc, watch}, @@ -160,7 +160,9 @@ impl Bpf { // Remove old prefixes for p in self.paths.iter().filter(|p| !new_paths.contains(p)) { - path_prefix.remove(&(*p).into())?; + if let Err(e) = path_prefix.remove(&(*p).into()) { + warn!("Failed to remove path prefix: {e:#?}"); + } } self.paths = new_paths; @@ -228,7 +230,12 @@ impl Bpf { let event: &event_t = unsafe { &*(event.as_ptr() as *const _) }; let event = match Event::try_from(event) { Ok(event) => { - if event.is_ignored(&self.paths_globset) { + // If the event is monitored by parent, we need to check + // its host path, but we don't have that context here, + // so we let the event go into HostScanner and make the + // decision there. + if !event.is_monitored_by_parent() && + event.is_ignored(&self.paths_globset) { event_counter.dropped(); continue; } diff --git a/fact/src/event/mod.rs b/fact/src/event/mod.rs index 1854d7af..a5e96c47 100644 --- a/fact/src/event/mod.rs +++ b/fact/src/event/mod.rs @@ -10,7 +10,7 @@ use globset::GlobSet; use log::warn; use serde::Serialize; -use fact_ebpf::{PATH_MAX, event_t, file_activity_type_t, inode_key_t}; +use fact_ebpf::{PATH_MAX, event_t, file_activity_type_t, inode_key_t, monitored_t}; use crate::host_info; use process::Process; @@ -95,6 +95,7 @@ impl Event { host_file, inode: Default::default(), parent_inode: Default::default(), + monitored: Default::default(), }; let file = match data { EventTestData::Creation => FileData::Creation(inner), @@ -143,6 +144,10 @@ impl Event { matches!(self.file, FileData::Unlink(_)) } + pub fn is_rename(&self) -> bool { + matches!(self.file, FileData::Rename(_)) + } + /// Unwrap the inner FileData and return the inode that triggered /// the event. /// @@ -195,7 +200,7 @@ impl Event { } } - fn get_old_filename(&self) -> Option<&PathBuf> { + pub fn get_old_filename(&self) -> Option<&PathBuf> { match &self.file { FileData::Rename(data) => Some(&data.old.filename), _ => None, @@ -214,6 +219,13 @@ impl Event { } } + pub fn get_old_host_path(&self) -> Option<&PathBuf> { + match &self.file { + FileData::Rename(data) => Some(&data.old.host_file), + _ => None, + } + } + /// Set the `host_file` field of the event to the one provided. /// /// In the case of operations that involve two paths, like rename, @@ -238,6 +250,25 @@ impl Event { } } + pub fn get_monitored(&self) -> monitored_t { + match &self.file { + FileData::Open(data) => data.monitored, + FileData::Creation(data) => data.monitored, + FileData::MkDir(data) => data.monitored, + FileData::Unlink(data) => data.monitored, + FileData::Chmod(data) => data.inner.monitored, + FileData::Chown(data) => data.inner.monitored, + FileData::Rename(data) => data.new.monitored, + } + } + + pub fn get_old_monitored(&self) -> Option { + match &self.file { + FileData::Rename(data) => Some(data.old.monitored), + _ => None, + } + } + /// Determine if the event should be ignored. /// /// With wildcards, the kernel can only match on the inode and @@ -249,13 +280,19 @@ impl Event { /// /// We also need to check the old values for rename events. pub fn is_ignored(&self, globset: &GlobSet) -> bool { - self.get_inode().empty() - && self.get_old_inode().is_none_or(|inode| inode.empty()) + self.get_monitored() != monitored_t::MONITORED_BY_INODE + && self + .get_old_monitored() + .is_none_or(|m| m != monitored_t::MONITORED_BY_INODE) && !globset.is_match(self.get_filename()) && self .get_old_filename() .is_none_or(|path| !globset.is_match(path)) } + + pub fn is_monitored_by_parent(&self) -> bool { + self.get_monitored() == monitored_t::MONITORED_BY_PARENT + } } impl TryFrom<&event_t> for Event { @@ -269,6 +306,7 @@ impl TryFrom<&event_t> for Event { value.filename, value.inode, value.parent_inode, + value.monitored, value.__bindgen_anon_1, )?; @@ -320,9 +358,10 @@ impl FileData { filename: [c_char; PATH_MAX as usize], inode: inode_key_t, parent_inode: inode_key_t, + monitored: monitored_t, extra_data: fact_ebpf::event_t__bindgen_ty_1, ) -> anyhow::Result { - let inner = BaseFileData::new(filename, inode, parent_inode)?; + let inner = BaseFileData::new(filename, inode, parent_inode, monitored)?; let file = match event_type { file_activity_type_t::FILE_ACTIVITY_OPEN => FileData::Open(inner), file_activity_type_t::FILE_ACTIVITY_CREATION => FileData::Creation(inner), @@ -347,11 +386,17 @@ impl FileData { FileData::Chown(data) } file_activity_type_t::FILE_ACTIVITY_RENAME => { - let old_filename = unsafe { extra_data.rename.old_filename }; - let old_inode = unsafe { extra_data.rename.old_inode }; + let old_filename = unsafe { extra_data.rename.filename }; + let old_inode = unsafe { extra_data.rename.inode }; + let old_monitored = unsafe { extra_data.rename.monitored }; let data = RenameFileData { new: inner, - old: BaseFileData::new(old_filename, old_inode, Default::default())?, + old: BaseFileData::new( + old_filename, + old_inode, + Default::default(), + old_monitored, + )?, }; FileData::Rename(data) } @@ -425,6 +470,7 @@ pub struct BaseFileData { host_file: PathBuf, inode: inode_key_t, parent_inode: inode_key_t, + monitored: monitored_t, } impl BaseFileData { @@ -432,12 +478,14 @@ impl BaseFileData { filename: [c_char; PATH_MAX as usize], inode: inode_key_t, parent_inode: inode_key_t, + monitored: monitored_t, ) -> anyhow::Result { Ok(BaseFileData { filename: sanitize_d_path(&filename), host_file: PathBuf::new(), // this field is set by HostScanner inode, parent_inode, + monitored, }) } } diff --git a/fact/src/host_scanner.rs b/fact/src/host_scanner.rs index f49c621f..af143e44 100644 --- a/fact/src/host_scanner.rs +++ b/fact/src/host_scanner.rs @@ -28,7 +28,8 @@ use std::{ use anyhow::{Context, bail}; use aya::maps::MapData; -use fact_ebpf::{inode_key_t, inode_value_t}; +use fact_ebpf::{inode_key_t, inode_value_t, monitored_t}; +use globset::{Glob, GlobSet, GlobSetBuilder}; use log::{debug, info, warn}; use tokio::{ sync::{Notify, broadcast, mpsc, watch}, @@ -54,6 +55,8 @@ pub struct HostScanner { tx: broadcast::Sender>, metrics: HostScannerMetrics, + + paths_globset: GlobSet, } impl HostScanner { @@ -68,6 +71,7 @@ impl HostScanner { let kernel_inode_map = RefCell::new(bpf.take_inode_map()?); let inode_map = RefCell::new(std::collections::HashMap::new()); let (tx, _) = broadcast::channel(100); + let paths_globset = HostScanner::build_globset(paths.borrow().as_slice())?; let host_scanner = HostScanner { kernel_inode_map, @@ -78,6 +82,7 @@ impl HostScanner { rx, tx, metrics, + paths_globset, }; // Run an initial scan to fill in the inode map @@ -86,6 +91,22 @@ impl HostScanner { Ok(host_scanner) } + fn build_globset(paths: &[PathBuf]) -> anyhow::Result { + let mut builder = GlobSetBuilder::new(); + for p in paths.iter() { + let Some(glob_str) = p.to_str() else { + bail!("failed to convert path {} to string", p.display()); + }; + + builder.add( + Glob::new(glob_str) + .with_context(|| format!("invalid glob {}", glob_str)) + .unwrap(), + ); + } + Ok(builder.build()?) + } + fn scan(&self) -> anyhow::Result<()> { debug!("Host scan started"); self.metrics.scan_inc(ScanLabels::Scans); @@ -233,6 +254,127 @@ impl HostScanner { self.metrics.scan_inc(ScanLabels::FileRemoved); } + fn handle_rename_event(&self, event: &mut Event) { + match event.get_monitored() { + monitored_t::MONITORED_BY_INODE => { + // This condition means a file is being renamed and taking the + // place of an existing, tracked file. We need to remove the + // inode we are landing on and put the associated host path in + // the old inode. + let mut inode_map = self.inode_map.borrow_mut(); + let Some(path) = inode_map.remove(event.get_inode()) else { + warn!("Old path was not found for inode tracked event"); + return; + }; + let Some(old_inode) = event.get_old_inode() else { + unreachable!("old inode not found for rename event"); + }; + inode_map.insert(*old_inode, path); + } + monitored_t::NOT_MONITORED + if event.get_old_monitored() == Some(monitored_t::MONITORED_BY_INODE) => + { + // We are landing on a path that is not tracked at all, remove + // the entries for the old path from the map + let Some(old_host_path) = event.get_old_host_path() else { + warn!("Rename event did not have old host path for inode tracked item"); + return; + }; + self.inode_map.borrow_mut().retain(|inode, path| { + if !path.starts_with(old_host_path) { + return true; + } + + let _ = self.kernel_inode_map.borrow_mut().remove(inode); + false + }); + } + monitored_t::NOT_MONITORED => { + // The new path is not monitored and the old path is most likely + // matching by path, we don't need to do anything in this case. + } + monitored_t::MONITORED_BY_PARENT if !event.get_inode().empty() => { + // The parent for the target is monitored, but the file itself + // is not. Remove the entry for the old file from the map. + self.inode_map.borrow_mut().remove( + event + .get_old_inode() + .expect("rename event did not have old inode"), + ); + } + monitored_t::MONITORED_BY_PARENT + if event.get_old_monitored() == Some(monitored_t::MONITORED_BY_INODE) => + { + // The target is monitored by parent and we are landing on a + // path that didn't hold anything, we need to figure out the + // host path and check if we should track it. + let mut inode_map = self.inode_map.borrow_mut(); + let Some(new_host_parent) = inode_map.get(event.get_parent_inode()) else { + warn!("Failed to get parent host path"); + return; + }; + let Some(filename) = event.get_filename().file_name() else { + warn!("Failed to get last component from event: {event:#?}"); + return; + }; + let new_host_path = new_host_parent.join(filename); + let Some(old_host_path) = event.get_old_host_path() else { + unreachable!("Rename event did not have an old host path"); + }; + + if self.paths_globset.is_match(&new_host_path) { + // New path needs to be tracked. + // Move all entries for the old host path to the new one + for (_, path) in inode_map.iter_mut() { + if let Ok(suffix) = path.strip_prefix(old_host_path) { + if suffix == Path::new("") { + *path = new_host_path.clone(); + } else { + *path = new_host_path.join(suffix); + } + } + } + + // Add the new host path to the event + event.set_host_path(new_host_path); + } else { + // New path is not tracked, remove old entries + inode_map.retain(|inode, path| { + if !path.starts_with(old_host_path) { + return true; + } + if let Err(e) = self.kernel_inode_map.borrow_mut().remove(inode) { + warn!("Failed to remove inode kernel entry: {e:?}"); + } + false + }); + } + } + monitored_t::MONITORED_BY_PARENT => { + // In this case, the target location might be monitored, but we + // don't have any information of the host path for the old path, + // best we can do is attempt to scan the file system and fix the + // inode maps that way. + if let Err(e) = self.scan() { + warn!("Scan failed: {e:?}"); + } + + // Attempt to update the host path with the old inode + if let Some(old_inode) = event.get_old_inode() + && let Some(path) = self.inode_map.borrow().get(old_inode) + { + event.set_host_path(path.clone()); + } + } + monitored_t::MONITORED_BY_PATH => { + // Nothing to do here, having one side of the rename monitored + // by path means at best the other side is also monitored by + // path, no inode tracking is involved. + } + _ => unreachable!("Invalid monitored value"), + } + } + /// Periodically notify the host scanner main task that a scan needs /// to happen. /// @@ -286,6 +428,11 @@ impl HostScanner { warn!("Failed to handle creation event: {e}"); } + // Skip directory creation events - we track them internally but don't send to sensor + if event.is_mkdir() { + continue; + } + if let Some(host_path) = self.get_host_path(Some(event.get_inode())) { self.metrics.scan_inc(ScanLabels::InodeHit); event.set_host_path(host_path); @@ -301,17 +448,31 @@ impl HostScanner { self.handle_unlink_event(&event); } - // Skip directory creation events - we track them internally but don't send to sensor - if !event.is_mkdir() { - let event = Arc::new(event); - if let Err(e) = self.tx.send(event) { - self.metrics.events.dropped(); - warn!("Failed to send event: {e}"); - } + if event.is_rename() { self.handle_rename_event(&mut event); } + + if event.is_monitored_by_parent() && + !self.paths_globset.is_match(event.get_host_path()) { + // The event was monitored by parent, but the host + // path is not to be monitored, so we ignore the + // event and attempt to remove the inode from the + // maps to prevent it from sending more events. + self.inode_map.borrow_mut().remove(event.get_inode()); + let _ = self.kernel_inode_map.borrow_mut().remove(event.get_inode()); + self.metrics.events.ignored(); + continue; + } + + let event = Arc::new(event); + if let Err(e) = self.tx.send(event) { + self.metrics.events.dropped(); + warn!("Failed to send event: {e}"); } }, _ = scan_trigger.notified() => self.scan()?, - _ = self.paths.changed() => self.scan()?, + _ = self.paths.changed() => { + self.paths_globset = HostScanner::build_globset(self.paths.borrow().as_slice())?; + self.scan()?; + } _ = self.running.changed() => { if !*self.running.borrow() { break; diff --git a/fact/src/metrics/host_scanner.rs b/fact/src/metrics/host_scanner.rs index f5197cf5..81bef60c 100644 --- a/fact/src/metrics/host_scanner.rs +++ b/fact/src/metrics/host_scanner.rs @@ -33,7 +33,12 @@ pub struct HostScannerMetrics { impl HostScannerMetrics { pub(super) fn new() -> Self { - let labels = [EventLabels::Total, EventLabels::Added, EventLabels::Dropped]; + let labels = [ + EventLabels::Total, + EventLabels::Added, + EventLabels::Dropped, + EventLabels::Ignored, + ]; let events = EventCounter::new( "host_scanner_events", "Events processed by the host scanner component", diff --git a/fact/src/metrics/mod.rs b/fact/src/metrics/mod.rs index 97213c83..c7a7cd73 100644 --- a/fact/src/metrics/mod.rs +++ b/fact/src/metrics/mod.rs @@ -100,6 +100,19 @@ impl EventCounter { .unwrap() .inc_by(n); } + + /// Increment the counter for the Ignored label. + /// + /// Panics if the counter did not add the Ignored label as part of + /// its creation step. + pub fn ignored(&self) { + self.counter + .get(&MetricEvents { + label: LabelValues::Ignored, + }) + .expect("Ignored label not found") + .inc(); + } } #[derive(Debug, Clone)] diff --git a/tests/test_path_rename.py b/tests/test_path_rename.py index 98c3447a..371f870b 100644 --- a/tests/test_path_rename.py +++ b/tests/test_path_rename.py @@ -28,7 +28,6 @@ def test_rename(monitored_dir, server, filename): fut = join_path_with_filename(monitored_dir, filename) old_fut = os.path.join(monitored_dir, 'file.txt') - # Create a new file, it will have an empty host_path. with open(old_fut, 'w') as f: f.write('This is a test') os.rename(old_fut, fut) @@ -37,23 +36,15 @@ def test_rename(monitored_dir, server, filename): # Convert fut to string for the Event, replacing invalid UTF-8 with U+FFFD fut = path_to_string(fut) - # TODO: Current behavior is incorrect. The inode map should be updated - # during rename events so that host_path reflects the new path. - # Expected correct behavior: - # - First rename: host_path should be `fut` (new path), old_host_path should be `old_fut` - # - Second rename: host_path should be `old_fut`, old_host_path should be `fut` - # Current behavior: host_path remains the original path (old_fut) because - # the inode map is not updated on rename events. old_host_path is empty. - events = [ - Event(process=Process.from_proc(), event_type=EventType.CREATION, + p = Process.from_proc() + server.wait_events([ + Event(process=p, event_type=EventType.CREATION, file=old_fut, host_path=old_fut), - Event(process=Process.from_proc(), event_type=EventType.RENAME, - file=fut, host_path='', old_file=old_fut, old_host_path=old_fut), - Event(process=Process.from_proc(), event_type=EventType.RENAME, - file=old_fut, host_path='', old_file=fut, old_host_path=old_fut), - ] - - server.wait_events(events) + Event(process=p, event_type=EventType.RENAME, file=fut, + host_path=fut, old_file=old_fut, old_host_path=old_fut), + Event(process=p, event_type=EventType.RENAME, file=old_fut, + host_path=old_fut, old_file=fut, old_host_path=fut), + ]) def test_ignored(monitored_dir, ignored_dir, server): @@ -67,6 +58,7 @@ def test_ignored(monitored_dir, ignored_dir, server): server: The server instance to communicate with. """ ignored_path = os.path.join(ignored_dir, 'test.txt') + p = Process.from_proc() with open(ignored_path, 'w') as f: f.write('This is to be ignored') @@ -75,26 +67,22 @@ def test_ignored(monitored_dir, ignored_dir, server): # Renaming in between ignored paths should not generate events os.rename(ignored_path, new_ignored_path) - # Renaming to a monitored path generates an event + # Renaming to a monitored path requires a scan, we need to wait for + # it before we can continue modifying the FS new_path = os.path.join(monitored_dir, 'rename.txt') os.rename(new_ignored_path, new_path) + server.wait_events([ + Event(process=p, event_type=EventType.RENAME, + file=new_path, host_path=new_path, old_file=new_ignored_path, old_host_path=''), + ]) # Renaming from a monitored path generates an event too os.rename(new_path, ignored_path) - p = Process.from_proc() - # TODO: Current behavior is incorrect for rename events. - # Expected: When renaming from ignored to monitored, host_path should be new_path. - # When renaming from monitored to ignored, old_host_path should be new_path. - # Current: The inode map is not updated on renames, and old_host_path is not populated. - events = [ - Event(process=p, event_type=EventType.RENAME, - file=new_path, host_path='', old_file=new_ignored_path, old_host_path=''), + server.wait_events([ Event(process=p, event_type=EventType.RENAME, - file=ignored_path, host_path='', old_file=new_path, old_host_path=''), - ] - - server.wait_events(events) + file=ignored_path, host_path='', old_file=new_path, old_host_path=new_path), + ]) def test_rename_dir(monitored_dir, ignored_dir, server): @@ -113,6 +101,20 @@ def test_rename_dir(monitored_dir, ignored_dir, server): ignored_dir: Temporary directory path that is not monitored by fact. server: The server instance to communicate with. """ + def touch_test_files(directory, process=None) -> list[Event]: + events = [] + for i in range(3): + with open(os.path.join(directory, f'{i}.txt'), 'w') as f: + f.write('This is a test') + if process is not None: + events.append( + Event(process=process, + event_type=EventType.OPEN, + file=os.path.join(directory, f'{i}.txt'), + host_path=os.path.join(directory, f'{i}.txt')) + ) + return events + # Directory Under Test dut = os.path.join(monitored_dir, 'some-dir') new_dut = os.path.join(monitored_dir, 'other-dir') @@ -120,32 +122,97 @@ def test_rename_dir(monitored_dir, ignored_dir, server): new_ignored_dut = os.path.join(ignored_dir, 'other-dir') os.mkdir(ignored_dut) - for i in range(3): - with open(os.path.join(ignored_dut, f'{i}.txt'), 'w') as f: - f.write('This is a test') + touch_test_files(ignored_dut) # This rename should generate no events os.rename(ignored_dut, new_ignored_dut) - # The next three renames need to generate one event each + # Going from a non-monitored directory to a monitored one requires a scan of + # the filesystem to add any subdirectories and files, so we need to wait for + # it to end before we can continue modifying the FS. os.rename(new_ignored_dut, dut) - os.rename(dut, new_dut) - os.rename(new_dut, ignored_dut) p = Process.from_proc() - # TODO: Current behavior is incorrect for rename events. - # Expected: host_path should reflect the new path after rename, - # old_host_path should reflect the old path if it was monitored. - # Current: The inode map is not updated on renames, so host_path remains empty - # or shows the wrong path. old_host_path is not populated. - events = [ + server.wait_events([ Event(process=p, event_type=EventType.RENAME, file=dut, - host_path='', old_file=new_ignored_dut, old_host_path=''), - Event(process=p, event_type=EventType.RENAME, - file=new_dut, host_path='', old_file=dut, old_host_path=''), + host_path=dut, old_file=new_ignored_dut, old_host_path=''), + ]) + + events = touch_test_files(dut, p) + + # The following renames should produce full events without scanning the FS. + os.rename(dut, new_dut) + events.extend([ + Event(process=p, event_type=EventType.RENAME, file=new_dut, + host_path=new_dut, old_file=dut, old_host_path=dut), + # Check the renamed subfiles are properly tracked + *touch_test_files(new_dut, p), + ]) + + os.rename(new_dut, ignored_dut) + events.append( Event(process=p, event_type=EventType.RENAME, - file=ignored_dut, host_path='', old_file=new_dut, old_host_path=''), - ] + file=ignored_dut, host_path='', old_file=new_dut, old_host_path=new_dut), + ) + + server.wait_events(events) + + +@pytest.mark.parametrize('from_monitored,to_monitored', [ + pytest.param(True, True, id='both_monitored'), + pytest.param(False, True, id='target_monitored'), + pytest.param(True, False, id='bullet_monitored'), +]) +def test_rename_overwrite(from_monitored, to_monitored, monitored_dir, ignored_dir, server): + events = [] + p = Process.from_proc() + if from_monitored: + bullet = os.path.join(monitored_dir, 'bullet.txt') + events.append( + Event(process=p, + event_type=EventType.CREATION, + file=bullet, + host_path=bullet, + )) + else: + bullet = os.path.join(ignored_dir, 'bullet.txt') + + if to_monitored: + target = os.path.join(monitored_dir, 'target.txt') + events.append( + Event(process=p, + event_type=EventType.CREATION, + file=target, + host_path=target, + )) + else: + target = os.path.join(ignored_dir, 'target.txt') + + # Create both files in the order they are expected in `events` + for path in [bullet, target]: + with open(path, 'w') as f: + f.write('This is a test') + + os.rename(bullet, target) + events.append( + Event(process=p, + event_type=EventType.RENAME, + file=target, + host_path=target if to_monitored else '', + old_file=bullet, + old_host_path=bullet if from_monitored else '', + )) + + if to_monitored: + # One final event to check the mapping is persisted correctly + with open(target, 'w') as f: + f.write('Check mapping') + events.append( + Event(process=p, + event_type=EventType.OPEN, + file=target, + host_path=target, + )) server.wait_events(events) @@ -213,3 +280,69 @@ def test_mounted_dir(test_container, ignored_dir, server): ] server.wait_events(events) + + +def test_cross_mountpoints(test_container, monitored_dir, server): + """ + Attempt to rename files/directories across mountpoints + + This test does not necessarily fit here, since it won't trigger the + path_rename LSM hook, but the test ensures that this hook is + defintely not triggered when an inode crosses a mount point, so it + doesn't fit but it kind of does? ¯\\_(ツ)_/¯ + """ + mounted_file = '/unmonitored/test.txt' + host_path = os.path.join(monitored_dir, 'test.txt') + ovfs_file = '/container-dir/test.txt' + + test_container.exec_run(f'touch {mounted_file}') + # Get owner uid and gid before moving the file + stat = os.stat(host_path) + owner_uid = stat.st_uid + owner_gid = stat.st_gid + mode = stat.st_mode + + test_container.exec_run(f'mv {mounted_file} {ovfs_file}') + test_container.exec_run(f'mv {ovfs_file} {mounted_file}') + + touch = Process.in_container( + exe_path='/usr/bin/touch', + args=f'touch {mounted_file}', + name='touch', + container_id=test_container.id[:12], + ) + first_rename = Process.in_container( + exe_path='/usr/bin/mv', + args=f'mv {mounted_file} {ovfs_file}', + name='mv', + container_id=test_container.id[:12], + ) + second_rename = Process.in_container( + exe_path='/usr/bin/mv', + args=f'mv {ovfs_file} {mounted_file}', + name='mv', + container_id=test_container.id[:12], + ) + + server.wait_events([ + Event(process=touch, event_type=EventType.OPEN, + file=mounted_file, host_path=host_path), + Event(process=first_rename, event_type=EventType.CREATION, + file=ovfs_file, host_path=''), + Event(process=first_rename, event_type=EventType.OPEN, + file=ovfs_file, host_path=''), + Event(process=first_rename, event_type=EventType.OWNERSHIP, + file=ovfs_file, host_path='', owner_uid=owner_uid, owner_gid=owner_gid), + Event(process=first_rename, event_type=EventType.PERMISSION, + file=ovfs_file, host_path='', mode=mode), + Event(process=first_rename, event_type=EventType.UNLINK, + file=mounted_file, host_path=host_path), + Event(process=second_rename, event_type=EventType.CREATION, + file=mounted_file, host_path=host_path), + Event(process=second_rename, event_type=EventType.OWNERSHIP, + file=mounted_file, host_path=host_path, owner_uid=owner_uid, owner_gid=owner_gid), + Event(process=second_rename, event_type=EventType.PERMISSION, + file=mounted_file, host_path=host_path, mode=mode), + Event(process=second_rename, event_type=EventType.UNLINK, + file=ovfs_file, host_path=''), + ])