@@ -549,8 +549,6 @@ dynamic_exit_uop[MAX_UOP_ID + 1] = {
549549};
550550
551551
552- #define CONFIDENCE_RANGE 1000
553- #define CONFIDENCE_CUTOFF 333
554552
555553#ifdef Py_DEBUG
556554#define DPRINTF (level , ...) \
@@ -598,6 +596,46 @@ add_to_trace(
598596 ((uint32_t)((INSTR) - ((_Py_CODEUNIT *)(CODE)->co_code_adaptive)))
599597
600598
599+ /* Compute branch fitness penalty based on how likely the traced path is.
600+ * The penalty is small when the traced path is common, large when rare.
601+ * A branch that historically goes the other way gets a heavy penalty. */
602+ static inline int
603+ compute_branch_penalty (uint16_t history , bool branch_taken )
604+ {
605+ int taken_count = _Py_popcount32 ((uint32_t )history );
606+ int on_trace_count = branch_taken ? taken_count : 16 - taken_count ;
607+ int off_trace = 16 - on_trace_count ;
608+ /* Linear scaling: off_trace ranges from 0 (fully biased our way)
609+ * to 16 (fully biased against us), so the penalty ranges from
610+ * FITNESS_BRANCH_BASE to FITNESS_BRANCH_BASE + 32. */
611+ return FITNESS_BRANCH_BASE + off_trace * 2 ;
612+ }
613+
614+ /* Compute exit quality for the current trace position.
615+ * Higher values mean better places to stop the trace. */
616+ static inline int32_t
617+ compute_exit_quality (_Py_CODEUNIT * target_instr , int opcode ,
618+ const _PyJitTracerState * tracer )
619+ {
620+ if (target_instr == tracer -> initial_state .start_instr ||
621+ target_instr == tracer -> initial_state .close_loop_instr ) {
622+ return EXIT_QUALITY_CLOSE_LOOP ;
623+ }
624+ if (target_instr -> op .code == ENTER_EXECUTOR ) {
625+ return EXIT_QUALITY_ENTER_EXECUTOR ;
626+ }
627+ if (_PyOpcode_Caches [_PyOpcode_Deopt [opcode ]] > 0 ) {
628+ return EXIT_QUALITY_SPECIALIZABLE ;
629+ }
630+ return EXIT_QUALITY_DEFAULT ;
631+ }
632+
633+ static inline int32_t
634+ compute_frame_penalty (const _PyOptimizationConfig * cfg )
635+ {
636+ return (int32_t )cfg -> fitness_initial / 10 + 1 ;
637+ }
638+
601639static int
602640is_terminator (const _PyUOpInstruction * uop )
603641{
@@ -637,6 +675,7 @@ _PyJit_translate_single_bytecode_to_trace(
637675 _Py_CODEUNIT * this_instr = tracer -> prev_state .instr ;
638676 _Py_CODEUNIT * target_instr = this_instr ;
639677 uint32_t target = 0 ;
678+ int end_trace_opcode = _DEOPT ;
640679
641680 target = Py_IsNone ((PyObject * )old_code )
642681 ? (uint32_t )(target_instr - _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS_PTR )
@@ -734,16 +773,14 @@ _PyJit_translate_single_bytecode_to_trace(
734773 DPRINTF (2 , "Unsupported: oparg too large\n" );
735774 unsupported :
736775 {
737- // Rewind to previous instruction and replace with _EXIT_TRACE.
738776 _PyUOpInstruction * curr = uop_buffer_last (trace );
739777 while (curr -> opcode != _SET_IP && uop_buffer_length (trace ) > 2 ) {
740778 trace -> next -- ;
741779 curr = uop_buffer_last (trace );
742780 }
743- assert (curr -> opcode == _SET_IP || uop_buffer_length (trace ) == 2 );
744781 if (curr -> opcode == _SET_IP ) {
745782 int32_t old_target = (int32_t )uop_get_target (curr );
746- curr -> opcode = _DEOPT ;
783+ curr -> opcode = end_trace_opcode ;
747784 curr -> format = UOP_FORMAT_TARGET ;
748785 curr -> target = old_target ;
749786 }
@@ -763,6 +800,23 @@ _PyJit_translate_single_bytecode_to_trace(
763800 return 1 ;
764801 }
765802
803+ // Fitness-based trace quality check (before reserving space for this instruction)
804+ _PyJitTracerTranslatorState * ts = & tracer -> translator_state ;
805+ int32_t eq = compute_exit_quality (target_instr , opcode , tracer );
806+ DPRINTF (3 , "Fitness check: %s(%d) fitness=%d, exit_quality=%d, depth=%d\n" ,
807+ _PyOpcode_OpName [opcode ], oparg , ts -> fitness , eq , ts -> frame_depth );
808+
809+ // Check if fitness is depleted — should we stop the trace?
810+ if (ts -> fitness < eq ) {
811+ // This is a tracer heuristic rather than normal program control flow,
812+ // so leave operand1 clear and let the resulting side exit increase chain_depth.
813+ ADD_TO_TRACE (_EXIT_TRACE , 0 , 0 , target );
814+ OPT_STAT_INC (fitness_terminated_traces );
815+ DPRINTF (2 , "Fitness terminated: %s(%d) fitness=%d < exit_quality=%d\n" ,
816+ _PyOpcode_OpName [opcode ], oparg , ts -> fitness , eq );
817+ goto done ;
818+ }
819+
766820 // One for possible _DEOPT, one because _CHECK_VALIDITY itself might _DEOPT
767821 trace -> end -= 2 ;
768822
@@ -816,13 +870,22 @@ _PyJit_translate_single_bytecode_to_trace(
816870 assert (jump_happened ? (next_instr == computed_jump_instr ) : (next_instr == computed_next_instr ));
817871 uint32_t uopcode = BRANCH_TO_GUARD [opcode - POP_JUMP_IF_FALSE ][jump_happened ];
818872 ADD_TO_TRACE (uopcode , 0 , 0 , INSTR_IP (jump_happened ? computed_next_instr : computed_jump_instr , old_code ));
873+ int bp = compute_branch_penalty (target_instr [1 ].cache , jump_happened );
874+ tracer -> translator_state .fitness -= bp ;
875+ DPRINTF (3 , " branch penalty: -%d (history=0x%04x, taken=%d) -> fitness=%d\n" ,
876+ bp , target_instr [1 ].cache , jump_happened ,
877+ tracer -> translator_state .fitness );
878+
819879 break ;
820880 }
821881 case JUMP_BACKWARD_JIT :
822882 // This is possible as the JIT might have re-activated after it was disabled
823883 case JUMP_BACKWARD_NO_JIT :
824884 case JUMP_BACKWARD :
825885 ADD_TO_TRACE (_CHECK_PERIODIC , 0 , 0 , target );
886+ tracer -> translator_state .fitness -= FITNESS_BACKWARD_EDGE ;
887+ DPRINTF (3 , " backward edge penalty: -%d -> fitness=%d\n" ,
888+ FITNESS_BACKWARD_EDGE , tracer -> translator_state .fitness );
826889 _Py_FALLTHROUGH ;
827890 case JUMP_BACKWARD_NO_INTERRUPT :
828891 {
@@ -945,6 +1008,44 @@ _PyJit_translate_single_bytecode_to_trace(
9451008 assert (next -> op .code == STORE_FAST );
9461009 operand = next -> op .arg ;
9471010 }
1011+ else if (uop == _PUSH_FRAME ) {
1012+ _PyJitTracerTranslatorState * ts_depth = & tracer -> translator_state ;
1013+ ts_depth -> frame_depth ++ ;
1014+ if (ts_depth -> frame_depth >= MAX_ABSTRACT_FRAME_DEPTH ) {
1015+ // The optimizer can't handle frames this deep,
1016+ // so there's no point continuing the trace.
1017+ DPRINTF (2 , "Unsupported: frame depth %d >= MAX_ABSTRACT_FRAME_DEPTH\n" ,
1018+ ts_depth -> frame_depth );
1019+ end_trace_opcode = _EXIT_TRACE ;
1020+ goto unsupported ;
1021+ }
1022+ int32_t frame_penalty = compute_frame_penalty (& tstate -> interp -> opt_config );
1023+ int32_t cost = frame_penalty * ts_depth -> frame_depth ;
1024+ ts_depth -> fitness -= cost ;
1025+ DPRINTF (3 , " _PUSH_FRAME: depth=%d, penalty=-%d (per_frame=%d) -> fitness=%d\n" ,
1026+ ts_depth -> frame_depth , cost , frame_penalty ,
1027+ ts_depth -> fitness );
1028+ }
1029+ else if (uop == _RETURN_VALUE || uop == _RETURN_GENERATOR || uop == _YIELD_VALUE ) {
1030+ _PyJitTracerTranslatorState * ts_depth = & tracer -> translator_state ;
1031+ int32_t frame_penalty = compute_frame_penalty (& tstate -> interp -> opt_config );
1032+ if (ts_depth -> frame_depth <= 0 ) {
1033+ // Underflow: returning from a frame we didn't enter
1034+ ts_depth -> fitness -= frame_penalty * 2 ;
1035+ DPRINTF (3 , " %s: underflow penalty=-%d -> fitness=%d\n" ,
1036+ _PyOpcode_uop_name [uop ], frame_penalty * 2 ,
1037+ ts_depth -> fitness );
1038+ }
1039+ else {
1040+ // Reward returning: small inlined calls should be encouraged
1041+ ts_depth -> fitness += frame_penalty ;
1042+ DPRINTF (3 , " %s: return reward=+%d, depth=%d -> fitness=%d\n" ,
1043+ _PyOpcode_uop_name [uop ], frame_penalty ,
1044+ ts_depth -> frame_depth - 1 ,
1045+ ts_depth -> fitness );
1046+ }
1047+ ts_depth -> frame_depth = ts_depth -> frame_depth <= 0 ? 0 : ts_depth -> frame_depth - 1 ;
1048+ }
9481049 else if (_PyUop_Flags [uop ] & HAS_RECORDS_VALUE_FLAG ) {
9491050 PyObject * recorded_value = tracer -> prev_state .recorded_value ;
9501051 tracer -> prev_state .recorded_value = NULL ;
@@ -986,7 +1087,13 @@ _PyJit_translate_single_bytecode_to_trace(
9861087 ADD_TO_TRACE (_JUMP_TO_TOP , 0 , 0 , 0 );
9871088 goto done ;
9881089 }
989- DPRINTF (2 , "Trace continuing\n" );
1090+ // Update fitness AFTER translation, BEFORE returning to continue tracing.
1091+ // This ensures the next iteration's fitness check reflects the cost of
1092+ // all instructions translated so far.
1093+ tracer -> translator_state .fitness -= FITNESS_PER_INSTRUCTION ;
1094+ DPRINTF (3 , " per-insn cost: -%d -> fitness=%d\n" ,
1095+ FITNESS_PER_INSTRUCTION , tracer -> translator_state .fitness );
1096+ DPRINTF (2 , "Trace continuing (fitness=%d)\n" , tracer -> translator_state .fitness );
9901097 return 1 ;
9911098done :
9921099 DPRINTF (2 , "Trace done\n" );
@@ -1069,6 +1176,17 @@ _PyJit_TryInitializeTracing(
10691176 assert (curr_instr -> op .code == JUMP_BACKWARD_JIT || curr_instr -> op .code == RESUME_CHECK_JIT || (exit != NULL ));
10701177 tracer -> initial_state .jump_backward_instr = curr_instr ;
10711178
1179+ // Initialize fitness tracking state
1180+ const _PyOptimizationConfig * cfg = & tstate -> interp -> opt_config ;
1181+ _PyJitTracerTranslatorState * ts = & tracer -> translator_state ;
1182+ bool is_side_trace = (exit != NULL );
1183+ ts -> fitness = is_side_trace
1184+ ? (int32_t )cfg -> fitness_initial_side
1185+ : (int32_t )cfg -> fitness_initial ;
1186+ ts -> frame_depth = 0 ;
1187+ DPRINTF (3 , "Fitness init: %s trace, fitness=%d\n" ,
1188+ is_side_trace ? "side" : "root" , ts -> fitness );
1189+
10721190 tracer -> is_tracing = true;
10731191 return 1 ;
10741192}
0 commit comments