Skip to content

Commit f329544

Browse files
committed
Add churn mode for incremental correctness testing
- Add -churn n CLI option to simulate file add/remove cycles - Alternates between removing n random files and adding them back - Shows issue count changes per run with detailed diff - Add remove_batch to ReactiveFileCollection for batched removals (28x faster) - Add reset_stats to Reactive for per-operation stat tracking - Include churn time in timing report with proper accounting - Print aggregate stats at end: mean/std for churn time and issue changes - Add skip_file parameter to filter removed files from processing
1 parent dfcb63e commit f329544

File tree

9 files changed

+191
-10
lines changed

9 files changed

+191
-10
lines changed

analysis/reactive/src/Reactive.ml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,21 @@ module Registry = struct
144144
Hashtbl.clear combinators;
145145
dirty_nodes := []
146146

147+
let reset_stats () =
148+
Hashtbl.iter
149+
(fun _ info ->
150+
info.stats.deltas_received <- 0;
151+
info.stats.entries_received <- 0;
152+
info.stats.adds_received <- 0;
153+
info.stats.removes_received <- 0;
154+
info.stats.process_count <- 0;
155+
info.stats.process_time_ns <- 0L;
156+
info.stats.deltas_emitted <- 0;
157+
info.stats.entries_emitted <- 0;
158+
info.stats.adds_emitted <- 0;
159+
info.stats.removes_emitted <- 0)
160+
nodes
161+
147162
(** Generate Mermaid diagram of the pipeline *)
148163
let to_mermaid () =
149164
let buf = Buffer.create 256 in
@@ -1167,3 +1182,4 @@ let fixpoint ~name ~(init : ('k, unit) t) ~(edges : ('k, 'k list) t) () :
11671182
let to_mermaid () = Registry.to_mermaid ()
11681183
let print_stats () = Registry.print_stats ()
11691184
let reset () = Registry.clear ()
1185+
let reset_stats () = Registry.reset_stats ()

analysis/reactive/src/Reactive.mli

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,3 +164,6 @@ val print_stats : unit -> unit
164164

165165
val reset : unit -> unit
166166
(** Clear all registered nodes (for tests) *)
167+
168+
val reset_stats : unit -> unit
169+
(** Reset all node statistics to zero (keeps nodes intact) *)

analysis/reactive/src/ReactiveFileCollection.ml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,19 @@ let remove t path =
8282
Hashtbl.remove t.internal.cache path;
8383
emit t (Reactive.Remove path)
8484

85+
(** Remove multiple files as a batch *)
86+
let remove_batch t paths =
87+
let entries =
88+
paths
89+
|> List.filter_map (fun path ->
90+
if Hashtbl.mem t.internal.cache path then (
91+
Hashtbl.remove t.internal.cache path;
92+
Some (path, None))
93+
else None)
94+
in
95+
if entries <> [] then emit t (Reactive.Batch entries);
96+
List.length entries
97+
8598
(** Clear all cached data *)
8699
let clear t = Hashtbl.clear t.internal.cache
87100

analysis/reactive/src/ReactiveFileCollection.mli

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,10 @@ val process_if_changed : ('raw, 'v) t -> string -> bool
5454
val remove : ('raw, 'v) t -> string -> unit
5555
(** Remove a file from the collection. *)
5656

57+
val remove_batch : ('raw, 'v) t -> string list -> int
58+
(** Remove multiple files as a batch. Returns the number of files removed.
59+
More efficient than calling [remove] multiple times. *)
60+
5761
(** {1 Cache Management} *)
5862

5963
val invalidate : ('raw, 'v) t -> string -> unit

analysis/reanalyze/src/Cli.ml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,5 +34,8 @@ let reactive = ref false
3434
(* number of analysis runs (for benchmarking reactive mode) *)
3535
let runs = ref 1
3636

37+
(* number of files to churn (remove/re-add) between runs for incremental testing *)
38+
let churn = ref 0
39+
3740
(* output mermaid diagram of reactive pipeline *)
3841
let mermaid = ref false

analysis/reanalyze/src/Log_.ml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ module Stats = struct
197197
let issues = ref []
198198
let addIssue (issue : Issue.t) = issues := issue :: !issues
199199
let clear () = issues := []
200+
let get_issue_count () = List.length !issues
200201

201202
let getSortedIssues () =
202203
let counters2 = Hashtbl.create 1 in

analysis/reanalyze/src/Reanalyze.ml

Lines changed: 139 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -204,8 +204,14 @@ let processFilesParallel ~config ~numDomains (cmtFilePaths : string list) :
204204

205205
(** Process all cmt files and return results for DCE and Exception analysis.
206206
Conceptually: map process_cmt_file over all files. *)
207-
let processCmtFiles ~config ~cmtRoot ~reactive_collection : all_files_result =
208-
let cmtFilePaths = collectCmtFilePaths ~cmtRoot in
207+
let processCmtFiles ~config ~cmtRoot ~reactive_collection ~skip_file :
208+
all_files_result =
209+
let cmtFilePaths =
210+
let all = collectCmtFilePaths ~cmtRoot in
211+
match skip_file with
212+
| Some should_skip -> List.filter (fun p -> not (should_skip p)) all
213+
| None -> all
214+
in
209215
(* Reactive mode: use incremental processing that skips unchanged files *)
210216
match reactive_collection with
211217
| Some collection ->
@@ -245,10 +251,10 @@ let shuffle_list lst =
245251
Array.to_list arr
246252

247253
let runAnalysis ~dce_config ~cmtRoot ~reactive_collection ~reactive_merge
248-
~reactive_liveness ~reactive_solver =
254+
~reactive_liveness ~reactive_solver ~skip_file =
249255
(* Map: process each file -> list of file_data *)
250256
let {dce_data_list; exception_results} =
251-
processCmtFiles ~config:dce_config ~cmtRoot ~reactive_collection
257+
processCmtFiles ~config:dce_config ~cmtRoot ~reactive_collection ~skip_file
252258
in
253259
(* Get exception results from reactive collection if available *)
254260
let exception_results =
@@ -522,20 +528,141 @@ let runAnalysisAndReport ~cmtRoot =
522528
~config:dce_config)
523529
| _ -> None
524530
in
531+
(* Collect CMT file paths once for churning *)
532+
let cmtFilePaths =
533+
if !Cli.churn > 0 then Some (collectCmtFilePaths ~cmtRoot) else None
534+
in
535+
(* Track previous issue count for diff reporting *)
536+
let prev_issue_count = ref 0 in
537+
(* Track currently removed files (to add them back on next run) *)
538+
let removed_files = ref [] in
539+
(* Set of removed files for filtering in processCmtFiles *)
540+
let removed_set = Hashtbl.create 64 in
541+
(* Aggregate stats for churn mode *)
542+
let churn_times = ref [] in
543+
let issues_added_list = ref [] in
544+
let issues_removed_list = ref [] in
525545
for run = 1 to numRuns do
526546
Timing.reset ();
527547
(* Clear stats at start of each run to avoid accumulation *)
528548
if run > 1 then Log_.Stats.clear ();
549+
(* Print run header first *)
529550
if numRuns > 1 && !Cli.timing then
530551
Printf.eprintf "\n=== Run %d/%d ===\n%!" run numRuns;
552+
(* Churn: alternate between remove and add phases *)
553+
(if !Cli.churn > 0 then
554+
match (reactive_collection, cmtFilePaths) with
555+
| Some collection, Some paths ->
556+
Reactive.reset_stats ();
557+
if run > 1 && !removed_files <> [] then (
558+
(* Add back previously removed files *)
559+
let to_add = !removed_files in
560+
removed_files := [];
561+
(* Clear removed set so these files get processed again *)
562+
List.iter (fun p -> Hashtbl.remove removed_set p) to_add;
563+
let t0 = Unix.gettimeofday () in
564+
let processed =
565+
ReactiveFileCollection.process_files_batch
566+
(collection
567+
: ReactiveAnalysis.t
568+
:> (_, _) ReactiveFileCollection.t)
569+
to_add
570+
in
571+
let elapsed = Unix.gettimeofday () -. t0 in
572+
Timing.add_churn_time elapsed;
573+
churn_times := elapsed :: !churn_times;
574+
if !Cli.timing then (
575+
Printf.eprintf " Added back %d files (%.3fs)\n%!" processed
576+
elapsed;
577+
(match reactive_liveness with
578+
| Some liveness -> ReactiveLiveness.print_stats ~t:liveness
579+
| None -> ());
580+
match reactive_solver with
581+
| Some solver -> ReactiveSolver.print_stats ~t:solver
582+
| None -> ()))
583+
else if run > 1 then (
584+
(* Remove new random files *)
585+
let numChurn = min !Cli.churn (List.length paths) in
586+
let shuffled = shuffle_list paths in
587+
let to_remove = List.filteri (fun i _ -> i < numChurn) shuffled in
588+
removed_files := to_remove;
589+
(* Mark as removed so processCmtFiles skips them *)
590+
List.iter (fun p -> Hashtbl.replace removed_set p ()) to_remove;
591+
let t0 = Unix.gettimeofday () in
592+
let removed =
593+
ReactiveFileCollection.remove_batch
594+
(collection
595+
: ReactiveAnalysis.t
596+
:> (_, _) ReactiveFileCollection.t)
597+
to_remove
598+
in
599+
let elapsed = Unix.gettimeofday () -. t0 in
600+
Timing.add_churn_time elapsed;
601+
churn_times := elapsed :: !churn_times;
602+
if !Cli.timing then (
603+
Printf.eprintf " Removed %d files (%.3fs)\n%!" removed elapsed;
604+
(match reactive_liveness with
605+
| Some liveness -> ReactiveLiveness.print_stats ~t:liveness
606+
| None -> ());
607+
match reactive_solver with
608+
| Some solver -> ReactiveSolver.print_stats ~t:solver
609+
| None -> ()))
610+
| _ -> ());
611+
(* Skip removed files in reactive mode *)
612+
let skip_file =
613+
if Hashtbl.length removed_set > 0 then
614+
Some (fun path -> Hashtbl.mem removed_set path)
615+
else None
616+
in
531617
runAnalysis ~dce_config ~cmtRoot ~reactive_collection ~reactive_merge
532-
~reactive_liveness ~reactive_solver;
533-
if run = numRuns then (
534-
(* Only report on last run *)
618+
~reactive_liveness ~reactive_solver ~skip_file;
619+
(* Report issue count with diff *)
620+
let current_count = Log_.Stats.get_issue_count () in
621+
if !Cli.churn > 0 then (
622+
let diff = current_count - !prev_issue_count in
623+
(* Track added/removed separately *)
624+
if run > 1 then
625+
if diff > 0 then
626+
issues_added_list := float_of_int diff :: !issues_added_list
627+
else if diff < 0 then
628+
issues_removed_list := float_of_int (-diff) :: !issues_removed_list;
629+
let diff_str =
630+
if run = 1 then ""
631+
else if diff >= 0 then Printf.sprintf " (+%d)" diff
632+
else Printf.sprintf " (%d)" diff
633+
in
535634
Log_.Stats.report ~config:dce_config;
536-
Log_.Stats.clear ());
635+
if !Cli.timing then
636+
Printf.eprintf " Total issues: %d%s\n%!" current_count diff_str;
637+
prev_issue_count := current_count)
638+
else if run = numRuns then
639+
(* Only report on last run for non-churn mode *)
640+
Log_.Stats.report ~config:dce_config;
641+
Log_.Stats.clear ();
537642
Timing.report ()
538643
done;
644+
(* Print aggregate churn stats *)
645+
if !Cli.churn > 0 && !Cli.timing && List.length !churn_times > 0 then (
646+
let calc_stats lst =
647+
if lst = [] then (0.0, 0.0)
648+
else
649+
let n = float_of_int (List.length lst) in
650+
let sum = List.fold_left ( +. ) 0.0 lst in
651+
let mean = sum /. n in
652+
let variance =
653+
List.fold_left (fun acc x -> acc +. ((x -. mean) ** 2.0)) 0.0 lst /. n
654+
in
655+
(mean, sqrt variance)
656+
in
657+
let time_mean, time_std = calc_stats !churn_times in
658+
let added_mean, added_std = calc_stats !issues_added_list in
659+
let removed_mean, removed_std = calc_stats !issues_removed_list in
660+
Printf.eprintf "\n=== Churn Summary ===\n";
661+
Printf.eprintf " Churn operations: %d\n" (List.length !churn_times);
662+
Printf.eprintf " Churn time: mean=%.3fs std=%.3fs\n" time_mean time_std;
663+
Printf.eprintf " Issues added: mean=%.0f std=%.0f\n" added_mean added_std;
664+
Printf.eprintf " Issues removed: mean=%.0f std=%.0f\n" removed_mean
665+
removed_std);
539666
if !Cli.json then EmitJson.finish ()
540667

541668
let cli () =
@@ -657,6 +784,10 @@ let cli () =
657784
( "-runs",
658785
Int (fun n -> Cli.runs := n),
659786
"n Run analysis n times (for benchmarking cache effectiveness)" );
787+
( "-churn",
788+
Int (fun n -> Cli.churn := n),
789+
"n Remove and re-add n random files between runs (tests incremental \
790+
correctness)" );
660791
("-version", Unit versionAndExit, "Show version information and exit");
661792
("--version", Unit versionAndExit, "Show version information and exit");
662793
]

analysis/reanalyze/src/Timing.ml

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
let enabled = ref false
44

55
type phase_times = {
6+
(* Churn (file add/remove) *)
7+
mutable churn: float;
68
(* CMT processing sub-phases *)
79
mutable file_loading: float;
810
mutable result_collection: float;
@@ -15,6 +17,7 @@ type phase_times = {
1517

1618
let times =
1719
{
20+
churn = 0.0;
1821
file_loading = 0.0;
1922
result_collection = 0.0;
2023
merging = 0.0;
@@ -26,12 +29,15 @@ let times =
2629
let timing_mutex = Mutex.create ()
2730

2831
let reset () =
32+
times.churn <- 0.0;
2933
times.file_loading <- 0.0;
3034
times.result_collection <- 0.0;
3135
times.merging <- 0.0;
3236
times.solving <- 0.0;
3337
times.reporting <- 0.0
3438

39+
let add_churn_time t = times.churn <- times.churn +. t
40+
3541
let now () = Unix.gettimeofday ()
3642

3743
let time_phase phase_name f =
@@ -56,8 +62,11 @@ let report () =
5662
if !enabled then (
5763
let cmt_total = times.file_loading in
5864
let analysis_total = times.merging +. times.solving in
59-
let total = cmt_total +. analysis_total +. times.reporting in
65+
let total = times.churn +. cmt_total +. analysis_total +. times.reporting in
6066
Printf.eprintf "\n=== Timing ===\n";
67+
if times.churn > 0.0 then
68+
Printf.eprintf " Churn: %.3fs (%.1f%%)\n" times.churn
69+
(100.0 *. times.churn /. total);
6170
Printf.eprintf " CMT processing: %.3fs (%.1f%%)\n" cmt_total
6271
(100.0 *. cmt_total /. total);
6372
(* Only show parallel-specific timing when used *)

analysis/src/DceCommand.ml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ let command () =
22
Reanalyze.RunConfig.dce ();
33
let dce_config = Reanalyze.DceConfig.current () in
44
Reanalyze.runAnalysis ~dce_config ~cmtRoot:None ~reactive_collection:None
5-
~reactive_merge:None ~reactive_liveness:None ~reactive_solver:None;
5+
~reactive_merge:None ~reactive_liveness:None ~reactive_solver:None
6+
~skip_file:None;
67
let issues = !Reanalyze.Log_.Stats.issues in
78
Printf.printf "issues:%d\n" (List.length issues)

0 commit comments

Comments
 (0)