@@ -204,8 +204,14 @@ let processFilesParallel ~config ~numDomains (cmtFilePaths : string list) :
204204
205205(* * Process all cmt files and return results for DCE and Exception analysis.
206206 Conceptually: map process_cmt_file over all files. *)
207- let processCmtFiles ~config ~cmtRoot ~reactive_collection : all_files_result =
208- let cmtFilePaths = collectCmtFilePaths ~cmt Root in
207+ let processCmtFiles ~config ~cmtRoot ~reactive_collection ~skip_file :
208+ all_files_result =
209+ let cmtFilePaths =
210+ let all = collectCmtFilePaths ~cmt Root in
211+ match skip_file with
212+ | Some should_skip -> List. filter (fun p -> not (should_skip p)) all
213+ | None -> all
214+ in
209215 (* Reactive mode: use incremental processing that skips unchanged files *)
210216 match reactive_collection with
211217 | Some collection ->
@@ -245,10 +251,10 @@ let shuffle_list lst =
245251 Array. to_list arr
246252
247253let runAnalysis ~dce_config ~cmtRoot ~reactive_collection ~reactive_merge
248- ~reactive_liveness ~reactive_solver =
254+ ~reactive_liveness ~reactive_solver ~ skip_file =
249255 (* Map: process each file -> list of file_data *)
250256 let {dce_data_list; exception_results} =
251- processCmtFiles ~config: dce_config ~cmt Root ~reactive_collection
257+ processCmtFiles ~config: dce_config ~cmt Root ~reactive_collection ~skip_file
252258 in
253259 (* Get exception results from reactive collection if available *)
254260 let exception_results =
@@ -522,20 +528,141 @@ let runAnalysisAndReport ~cmtRoot =
522528 ~config: dce_config)
523529 | _ -> None
524530 in
531+ (* Collect CMT file paths once for churning *)
532+ let cmtFilePaths =
533+ if ! Cli. churn > 0 then Some (collectCmtFilePaths ~cmt Root) else None
534+ in
535+ (* Track previous issue count for diff reporting *)
536+ let prev_issue_count = ref 0 in
537+ (* Track currently removed files (to add them back on next run) *)
538+ let removed_files = ref [] in
539+ (* Set of removed files for filtering in processCmtFiles *)
540+ let removed_set = Hashtbl. create 64 in
541+ (* Aggregate stats for churn mode *)
542+ let churn_times = ref [] in
543+ let issues_added_list = ref [] in
544+ let issues_removed_list = ref [] in
525545 for run = 1 to numRuns do
526546 Timing. reset () ;
527547 (* Clear stats at start of each run to avoid accumulation *)
528548 if run > 1 then Log_.Stats. clear () ;
549+ (* Print run header first *)
529550 if numRuns > 1 && ! Cli. timing then
530551 Printf. eprintf " \n === Run %d/%d ===\n %!" run numRuns;
552+ (* Churn: alternate between remove and add phases *)
553+ (if ! Cli. churn > 0 then
554+ match (reactive_collection, cmtFilePaths) with
555+ | Some collection , Some paths ->
556+ Reactive. reset_stats () ;
557+ if run > 1 && ! removed_files <> [] then (
558+ (* Add back previously removed files *)
559+ let to_add = ! removed_files in
560+ removed_files := [] ;
561+ (* Clear removed set so these files get processed again *)
562+ List. iter (fun p -> Hashtbl. remove removed_set p) to_add;
563+ let t0 = Unix. gettimeofday () in
564+ let processed =
565+ ReactiveFileCollection. process_files_batch
566+ (collection
567+ : ReactiveAnalysis. t
568+ :> (_, _) ReactiveFileCollection. t)
569+ to_add
570+ in
571+ let elapsed = Unix. gettimeofday () -. t0 in
572+ Timing. add_churn_time elapsed;
573+ churn_times := elapsed :: ! churn_times;
574+ if ! Cli. timing then (
575+ Printf. eprintf " Added back %d files (%.3fs)\n %!" processed
576+ elapsed;
577+ (match reactive_liveness with
578+ | Some liveness -> ReactiveLiveness. print_stats ~t: liveness
579+ | None -> () );
580+ match reactive_solver with
581+ | Some solver -> ReactiveSolver. print_stats ~t: solver
582+ | None -> () ))
583+ else if run > 1 then (
584+ (* Remove new random files *)
585+ let numChurn = min ! Cli. churn (List. length paths) in
586+ let shuffled = shuffle_list paths in
587+ let to_remove = List. filteri (fun i _ -> i < numChurn) shuffled in
588+ removed_files := to_remove;
589+ (* Mark as removed so processCmtFiles skips them *)
590+ List. iter (fun p -> Hashtbl. replace removed_set p () ) to_remove;
591+ let t0 = Unix. gettimeofday () in
592+ let removed =
593+ ReactiveFileCollection. remove_batch
594+ (collection
595+ : ReactiveAnalysis. t
596+ :> (_, _) ReactiveFileCollection. t)
597+ to_remove
598+ in
599+ let elapsed = Unix. gettimeofday () -. t0 in
600+ Timing. add_churn_time elapsed;
601+ churn_times := elapsed :: ! churn_times;
602+ if ! Cli. timing then (
603+ Printf. eprintf " Removed %d files (%.3fs)\n %!" removed elapsed;
604+ (match reactive_liveness with
605+ | Some liveness -> ReactiveLiveness. print_stats ~t: liveness
606+ | None -> () );
607+ match reactive_solver with
608+ | Some solver -> ReactiveSolver. print_stats ~t: solver
609+ | None -> () ))
610+ | _ -> () );
611+ (* Skip removed files in reactive mode *)
612+ let skip_file =
613+ if Hashtbl. length removed_set > 0 then
614+ Some (fun path -> Hashtbl. mem removed_set path)
615+ else None
616+ in
531617 runAnalysis ~dce_config ~cmt Root ~reactive_collection ~reactive_merge
532- ~reactive_liveness ~reactive_solver ;
533- if run = numRuns then (
534- (* Only report on last run *)
618+ ~reactive_liveness ~reactive_solver ~skip_file ;
619+ (* Report issue count with diff *)
620+ let current_count = Log_.Stats. get_issue_count () in
621+ if ! Cli. churn > 0 then (
622+ let diff = current_count - ! prev_issue_count in
623+ (* Track added/removed separately *)
624+ if run > 1 then
625+ if diff > 0 then
626+ issues_added_list := float_of_int diff :: ! issues_added_list
627+ else if diff < 0 then
628+ issues_removed_list := float_of_int (- diff) :: ! issues_removed_list;
629+ let diff_str =
630+ if run = 1 then " "
631+ else if diff > = 0 then Printf. sprintf " (+%d)" diff
632+ else Printf. sprintf " (%d)" diff
633+ in
535634 Log_.Stats. report ~config: dce_config;
536- Log_.Stats. clear () );
635+ if ! Cli. timing then
636+ Printf. eprintf " Total issues: %d%s\n %!" current_count diff_str;
637+ prev_issue_count := current_count)
638+ else if run = numRuns then
639+ (* Only report on last run for non-churn mode *)
640+ Log_.Stats. report ~config: dce_config;
641+ Log_.Stats. clear () ;
537642 Timing. report ()
538643 done ;
644+ (* Print aggregate churn stats *)
645+ if ! Cli. churn > 0 && ! Cli. timing && List. length ! churn_times > 0 then (
646+ let calc_stats lst =
647+ if lst = [] then (0.0 , 0.0 )
648+ else
649+ let n = float_of_int (List. length lst) in
650+ let sum = List. fold_left ( +. ) 0.0 lst in
651+ let mean = sum /. n in
652+ let variance =
653+ List. fold_left (fun acc x -> acc +. ((x -. mean) ** 2.0 )) 0.0 lst /. n
654+ in
655+ (mean, sqrt variance)
656+ in
657+ let time_mean, time_std = calc_stats ! churn_times in
658+ let added_mean, added_std = calc_stats ! issues_added_list in
659+ let removed_mean, removed_std = calc_stats ! issues_removed_list in
660+ Printf. eprintf " \n === Churn Summary ===\n " ;
661+ Printf. eprintf " Churn operations: %d\n " (List. length ! churn_times);
662+ Printf. eprintf " Churn time: mean=%.3fs std=%.3fs\n " time_mean time_std;
663+ Printf. eprintf " Issues added: mean=%.0f std=%.0f\n " added_mean added_std;
664+ Printf. eprintf " Issues removed: mean=%.0f std=%.0f\n " removed_mean
665+ removed_std);
539666 if ! Cli. json then EmitJson. finish ()
540667
541668let cli () =
@@ -657,6 +784,10 @@ let cli () =
657784 ( " -runs" ,
658785 Int (fun n -> Cli. runs := n),
659786 " n Run analysis n times (for benchmarking cache effectiveness)" );
787+ ( " -churn" ,
788+ Int (fun n -> Cli. churn := n),
789+ " n Remove and re-add n random files between runs (tests incremental \
790+ correctness)" );
660791 (" -version" , Unit versionAndExit, " Show version information and exit" );
661792 (" --version" , Unit versionAndExit, " Show version information and exit" );
662793 ]
0 commit comments