1+ """
2+ PuzzleKit Benchmark Tool -
3+
4+ Updated for _dataset.json format
5+ Compatible with new unified data format while preserving all original logic.
6+ """
7+
18import os
29import sys
310import json
@@ -100,23 +107,24 @@ def run_single_benchmark(puzzle_type: str, pid: str, problem_str: str, solution_
100107 record ["status" ] = result_data .get ("status" , "Unknown" )
101108 record ["total_time" ] = toc - tic
102109
103- # Verification
104- if record ["status" ] in ["Optimal" , "Feasible" ] and solution_str :
110+ # Verification (only if solution exists)
111+ if record ["status" ] in ["Optimal" , "Feasible" ] and solution_str . strip () :
105112 res_grid = result_data .get ("solution_grid" , [])
106113 sol_grid = parse_simple_solution_string (solution_str )
107114 try :
108115 is_correct = grid_verifier (puzzle_type , res_grid , sol_grid )
109116 record ["is_correct" ] = str (is_correct )
110117 except Exception as ve :
111118 record ["is_correct" ] = "Error"
119+ record ["error_msg" ] = f"Verification error: { ve } "
112120 except Exception as e :
113121 record ["status" ] = "Error"
114122 record ["error_msg" ] = str (e )
115123
116124 return record
117125
118126def parse_args ():
119- parser = argparse .ArgumentParser (description = "PuzzleKit Benchmark Tool." )
127+ parser = argparse .ArgumentParser (description = "PuzzleKit Benchmark Tool (supports _dataset.json format) ." )
120128
121129 # either all or one puzzle, mutually exclusive
122130 group = parser .add_mutually_exclusive_group ()
@@ -132,13 +140,10 @@ def main():
132140 tic = time .perf_counter ()
133141 args = parse_args ()
134142
135- # if no parameter is specified, print help information and exit, or default to previous behavior (currently set to default all, explicit usage of --all is required)
136- # but for convenience, if no parameter is specified, default to running all (compatibility with original script behavior)
137- # here I set it to: if no parameter is specified, default to running all (compatibility with original script behavior)
143+ # Default behavior: run all if no arguments specified
138144 target_puzzle = args .puzzle
139- run_all = args .all
145+ run_all = args .all or ( not target_puzzle and not run_all )
140146
141- # default behavior: if no parameter is specified, default to running all (compatibility with original script behavior)
142147 if not target_puzzle and not run_all :
143148 run_all = True
144149
@@ -155,9 +160,7 @@ def main():
155160 sorted_assets = sorted (asset_folders )
156161
157162 if target_puzzle :
158- # Normalize input to lower case for comparison
159163 target_lower = target_puzzle .lower ()
160- # Filter matching folders
161164 filtered_assets = [f for f in sorted_assets if f .lower () == target_lower ]
162165
163166 if not filtered_assets :
@@ -172,7 +175,7 @@ def main():
172175 # --- Stats Containers ---
173176 table_rows = []
174177 total_problems_global = 0
175- total_solutions_global = 0
178+ total_solutions_global = 0 # Now equals total_problems_global (all puzzles have solution slots)
176179
177180 csv_headers = ["puzzle_type" , "pid" , "status" , "is_correct" , "total_time" , "error_msg" ]
178181 csv_file = open (OUTPUT_CSV , 'w' , newline = '' , encoding = 'utf-8' )
@@ -181,78 +184,90 @@ def main():
181184
182185 print (f"Results will be saved to: { OUTPUT_CSV } " )
183186
184- # Iterate
187+ # Iterate over puzzle types
185188 for idx , folder_name in enumerate (sorted_assets , 1 ):
189+ # Match solver class name
186190 puzzle_type = None
187- # Heuristic matching
188191 for pt in all_puzzle_types :
189192 if infer_class_name (pt ) == folder_name :
190193 puzzle_type = pt
191194 break
192195
193- prob_path = os .path .join (ASSETS_DIR , folder_name , "problems" , f"{ folder_name } _puzzles.json" )
194- sol_path = os .path .join (ASSETS_DIR , folder_name , "solutions" , f"{ folder_name } _solutions.json" )
195-
196- prob_data = load_json_file (prob_path )
197- sol_data = load_json_file (sol_path )
196+ # === KEY CHANGE: Load unified _dataset.json instead of separate files ===
197+ dataset_path = os .path .join (ASSETS_DIR , folder_name , f"{ folder_name } _dataset.json" )
198+
199+ if not os .path .exists (dataset_path ):
200+ print (f" ⚠️ Skipping { folder_name } : _dataset.json not found at { dataset_path } " )
201+ continue
202+
203+ dataset_data = load_json_file (dataset_path )
204+ puzzles_dict = dataset_data .get ("data" , {})
198205
199- puzzles = prob_data .get ("puzzles" , {})
200- solutions_map = sol_data .get ("solutions" , {})
206+ # Get counts from dataset metadata (fallback to dict length if missing)
207+ num_pbl = dataset_data .get ("count" , len (puzzles_dict ))
208+ num_sol = dataset_data .get ("count_sol" , len (puzzles_dict )) # In new format, all puzzles have solution slots (may be empty strings)
201209
202- num_pbl = len (puzzles )
203- num_sol = len (solutions_map )
204- max_size = get_max_size_str (puzzles )
210+ # Calculate max size from problem data
211+ max_size = get_max_size_str (puzzles_dict )
205212
206213 total_problems_global += num_pbl
207- total_solutions_global += num_sol
214+ total_solutions_global += num_sol # Same as num_pbl in new format
208215
216+ # Check solver availability
209217 solver_status = "❌"
210- avg_time = "-"
211- max_time = "-"
212- correct_cnt = "-"
213-
214218 has_solver_impl = False
215- try :
216- if puzzle_type :
219+ if puzzle_type :
220+ try :
217221 get_solver_class (puzzle_type )
218222 has_solver_impl = True
219223 solver_status = "✅"
220- except ValueError :
221- pass
224+ except ( ValueError , AttributeError ) :
225+ pass
222226
227+ # Run benchmarks if solver exists and data available
223228 if has_solver_impl and num_pbl > 0 :
224229 print (f"[{ idx } /{ len (sorted_assets )} ] Benchmarking { folder_name } ({ num_pbl } instances)..." )
225230
226231 times = []
227232 corrects = 0
228233
229- for pid , p_data in puzzles .items ():
230- # Loop through instances
234+ for pid , p_data in puzzles_dict .items ():
231235 problem_str = p_data .get ("problem" , "" )
232- solution_str = solutions_map .get (pid , {}).get ("solution" , "" )
233-
236+ solution_str = p_data .get ("solution" , "" ) # May be empty string
237+
238+ # Run benchmark for this instance
234239 res = run_single_benchmark (puzzle_type , pid , problem_str , solution_str )
235240 writer .writerow (res )
236241
237- if res ['status' ] != "Error" :
242+ # Collect timing stats for non-error runs
243+ if res ['status' ] not in ["Error" , "NotStarted" ]:
238244 times .append (res ['total_time' ])
239245
246+ # Count correct solutions (only when verification was performed)
240247 if res ['is_correct' ] == 'True' :
241248 corrects += 1
242249
250+ # Calculate timing statistics
243251 if times :
244252 avg_time = f"{ statistics .mean (times ):.3f} "
245253 max_time = f"{ max (times ):.3f} "
254+ else :
255+ avg_time = "-"
256+ max_time = "-"
246257 correct_cnt = str (corrects )
247258 else :
248- print (f"[{ idx } /{ len (sorted_assets )} ] Skipping { folder_name } (No solver or no data)" )
259+ print (f"[{ idx } /{ len (sorted_assets )} ] Skipping { folder_name } (No solver implementation or no data)" )
260+ avg_time = "-"
261+ max_time = "-"
262+ correct_cnt = "-"
249263
264+ # Generate markdown table row
250265 folder_link = f"[{ folder_name } ](./assets/data/{ folder_name } )"
251266 table_rows .append ([
252267 str (idx ),
253268 folder_link ,
254269 str (num_pbl ),
255- str (num_sol ),
270+ str (num_sol ), # Now equals num_pbl (all puzzles have solution slots)
256271 max_size ,
257272 solver_status ,
258273 avg_time ,
@@ -262,7 +277,7 @@ def main():
262277
263278 csv_file .close ()
264279
265- # --- Generate Markdown ---
280+ # --- Generate Markdown Report ---
266281 print ("\n " + "=" * 50 )
267282 print ("GENERATING MARKDOWN REPORT" )
268283 print ("=" * 50 + "\n " )
@@ -300,6 +315,7 @@ def main():
300315 print (f"\n Markdown saved to: { md_path } " )
301316 print (f"Full CSV data saved to: { OUTPUT_CSV } " )
302317 toc = time .perf_counter ()
303- print (f"Time taken: { toc - tic :.3f} seconds" )
318+ print (f"Total benchmark time: { toc - tic :.3f} seconds" )
319+
304320if __name__ == "__main__" :
305321 main ()
0 commit comments