@@ -14,6 +14,40 @@ pub enum ScanRule {
1414 AllowedExecutablesOnly ( Vec < String > ) ,
1515}
1616
17+ /// Resource limits for skill scanning to prevent runaway resource consumption.
18+ #[ derive( Debug , Clone ) ]
19+ pub struct ScanLimits {
20+ /// Maximum file size to read in bytes (default: 1 MiB).
21+ pub max_file_size : u64 ,
22+ /// Maximum number of files to scan (default: 1000).
23+ pub max_files : usize ,
24+ /// Maximum directory recursion depth (default: 20).
25+ pub max_depth : usize ,
26+ }
27+
28+ impl Default for ScanLimits {
29+ fn default ( ) -> Self {
30+ Self {
31+ max_file_size : 1024 * 1024 , // 1 MiB
32+ max_files : 1000 ,
33+ max_depth : 20 ,
34+ }
35+ }
36+ }
37+
38+ /// Progress information emitted during a scan.
39+ #[ derive( Debug , Clone ) ]
40+ pub struct ScanProgress {
41+ /// File currently being scanned (relative path).
42+ pub file : String ,
43+ /// Number of files scanned so far.
44+ pub scanned : usize ,
45+ /// Total number of files discovered.
46+ pub total : usize ,
47+ /// Number of files skipped (too large, depth exceeded, etc.).
48+ pub skipped : usize ,
49+ }
50+
1751/// Severity of a scan finding.
1852#[ derive( Debug , Clone , PartialEq , Eq ) ]
1953pub enum ScanSeverity {
@@ -57,6 +91,7 @@ pub struct ScanResult {
5791pub struct SkillScanner {
5892 deny_patterns : Vec < ( String , Regex , ScanSeverity , String ) > ,
5993 allowed_executables : Option < Vec < String > > ,
94+ limits : ScanLimits ,
6095}
6196
6297/// Default ClawHavoc defense rules.
@@ -315,6 +350,11 @@ fn default_rules() -> Vec<(String, String, ScanSeverity, String)> {
315350impl SkillScanner {
316351 /// Create a scanner with default ClawHavoc defense rules.
317352 pub fn new ( ) -> Self {
353+ Self :: with_limits ( ScanLimits :: default ( ) )
354+ }
355+
356+ /// Create a scanner with default rules and custom resource limits.
357+ pub fn with_limits ( limits : ScanLimits ) -> Self {
318358 let compiled = default_rules ( )
319359 . into_iter ( )
320360 . filter_map ( |( name, pattern, severity, msg) | {
@@ -327,6 +367,7 @@ impl SkillScanner {
327367 Self {
328368 deny_patterns : compiled,
329369 allowed_executables : None ,
370+ limits,
330371 }
331372 }
332373
@@ -425,16 +466,78 @@ impl SkillScanner {
425466
426467 /// Scan all files in a skill directory.
427468 pub fn scan_skill ( & self , skill_dir : & Path ) -> ScanResult {
469+ self . scan_skill_with_progress ( skill_dir, |_| { } )
470+ }
471+
472+ /// Scan all files in a skill directory, reporting progress via callback.
473+ pub fn scan_skill_with_progress < F > ( & self , skill_dir : & Path , on_progress : F ) -> ScanResult
474+ where
475+ F : Fn ( & ScanProgress ) ,
476+ {
428477 let mut all_findings = Vec :: new ( ) ;
429478
430- if let Ok ( entries) = walk_dir_sorted ( skill_dir) {
479+ if let Ok ( entries) = walk_dir_sorted ( skill_dir, self . limits . max_depth ) {
480+ let total = entries. len ( ) . min ( self . limits . max_files ) ;
481+ let mut scanned = 0usize ;
482+ let mut skipped = 0usize ;
483+
431484 for entry_path in entries {
485+ if scanned >= self . limits . max_files {
486+ all_findings. push ( ScanFinding {
487+ rule : "scan-limit:max-files" . into ( ) ,
488+ severity : ScanSeverity :: Warning ,
489+ message : format ! (
490+ "Scan stopped after {} files (limit reached)" ,
491+ self . limits. max_files
492+ ) ,
493+ line : None ,
494+ file : skill_dir. display ( ) . to_string ( ) ,
495+ } ) ;
496+ break ;
497+ }
498+
499+ let relative = entry_path
500+ . strip_prefix ( skill_dir)
501+ . unwrap_or ( & entry_path)
502+ . to_string_lossy ( )
503+ . to_string ( ) ;
504+
505+ // Check file size before reading
506+ let file_size = entry_path
507+ . metadata ( )
508+ . map ( |m| m. len ( ) )
509+ . unwrap_or ( 0 ) ;
510+
511+ if file_size > self . limits . max_file_size {
512+ skipped += 1 ;
513+ all_findings. push ( ScanFinding {
514+ rule : "scan-limit:file-size" . into ( ) ,
515+ severity : ScanSeverity :: Info ,
516+ message : format ! (
517+ "Skipped: file size {} bytes exceeds limit of {} bytes" ,
518+ file_size, self . limits. max_file_size
519+ ) ,
520+ line : None ,
521+ file : relative. clone ( ) ,
522+ } ) ;
523+ on_progress ( & ScanProgress {
524+ file : relative,
525+ scanned,
526+ total,
527+ skipped,
528+ } ) ;
529+ continue ;
530+ }
531+
532+ scanned += 1 ;
533+ on_progress ( & ScanProgress {
534+ file : relative. clone ( ) ,
535+ scanned,
536+ total,
537+ skipped,
538+ } ) ;
539+
432540 if let Ok ( content) = std:: fs:: read_to_string ( & entry_path) {
433- let relative = entry_path
434- . strip_prefix ( skill_dir)
435- . unwrap_or ( & entry_path)
436- . to_string_lossy ( )
437- . to_string ( ) ;
438541 let findings = self . scan_content ( & content, & relative) ;
439542 all_findings. extend ( findings) ;
440543 }
@@ -459,22 +562,27 @@ impl Default for SkillScanner {
459562}
460563
461564/// Recursively walk a directory and return sorted file paths.
462- fn walk_dir_sorted ( dir : & Path ) -> std:: io:: Result < Vec < std:: path:: PathBuf > > {
565+ fn walk_dir_sorted ( dir : & Path , max_depth : usize ) -> std:: io:: Result < Vec < std:: path:: PathBuf > > {
463566 let mut files = Vec :: new ( ) ;
464- walk_dir_recursive ( dir, & mut files) ?;
567+ walk_dir_recursive ( dir, & mut files, 0 , max_depth ) ?;
465568 files. sort ( ) ;
466569 Ok ( files)
467570}
468571
469- fn walk_dir_recursive ( dir : & Path , files : & mut Vec < std:: path:: PathBuf > ) -> std:: io:: Result < ( ) > {
470- if !dir. is_dir ( ) {
572+ fn walk_dir_recursive (
573+ dir : & Path ,
574+ files : & mut Vec < std:: path:: PathBuf > ,
575+ depth : usize ,
576+ max_depth : usize ,
577+ ) -> std:: io:: Result < ( ) > {
578+ if !dir. is_dir ( ) || depth > max_depth {
471579 return Ok ( ( ) ) ;
472580 }
473581 for entry in std:: fs:: read_dir ( dir) ? {
474582 let entry = entry?;
475583 let path = entry. path ( ) ;
476584 if path. is_dir ( ) {
477- walk_dir_recursive ( & path, files) ?;
585+ walk_dir_recursive ( & path, files, depth + 1 , max_depth ) ?;
478586 } else if path. is_file ( ) {
479587 // Skip binary files and signature files
480588 let ext = path. extension ( ) . and_then ( |e| e. to_str ( ) ) . unwrap_or ( "" ) ;
0 commit comments