Skip to content

Commit 48317fe

Browse files
authored
gh-146640: Optimize int operations by mutating uniquely-referenced operands in place (JIT only) (GH-146641)
1 parent 80ab6d9 commit 48317fe

File tree

9 files changed

+2331
-1262
lines changed

9 files changed

+2331
-1262
lines changed

Include/internal/pycore_uop_ids.h

Lines changed: 1280 additions & 1256 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Include/internal/pycore_uop_metadata.h

Lines changed: 114 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Lib/test/test_capi/test_opt.py

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3390,6 +3390,154 @@ def testfunc(args):
33903390
uops = get_opnames(ex)
33913391
self.assertNotIn("_UNARY_NEGATIVE_FLOAT_INPLACE", uops)
33923392

3393+
def test_int_add_inplace_unique_lhs(self):
3394+
# a * b produces a unique compact int; adding c reuses it in place
3395+
def testfunc(args):
3396+
a, b, c, n = args
3397+
total = 0
3398+
for _ in range(n):
3399+
total += a * b + c
3400+
return total
3401+
3402+
res, ex = self._run_with_optimizer(testfunc, (2000, 3, 4000, TIER2_THRESHOLD))
3403+
self.assertEqual(res, TIER2_THRESHOLD * 10000)
3404+
self.assertIsNotNone(ex)
3405+
uops = get_opnames(ex)
3406+
self.assertIn("_BINARY_OP_ADD_INT_INPLACE", uops)
3407+
3408+
def test_int_add_inplace_unique_rhs(self):
3409+
# a * b produces a unique compact int on the right side of +
3410+
def testfunc(args):
3411+
a, b, c, n = args
3412+
total = 0
3413+
for _ in range(n):
3414+
total += c + a * b
3415+
return total
3416+
3417+
res, ex = self._run_with_optimizer(testfunc, (2000, 3, 4000, TIER2_THRESHOLD))
3418+
self.assertEqual(res, TIER2_THRESHOLD * 10000)
3419+
self.assertIsNotNone(ex)
3420+
uops = get_opnames(ex)
3421+
self.assertIn("_BINARY_OP_ADD_INT_INPLACE_RIGHT", uops)
3422+
3423+
def test_int_add_no_inplace_non_unique(self):
3424+
# Both operands of a + b are locals — neither is unique,
3425+
# so the first add uses the regular op. But total += (a+b)
3426+
# has a unique RHS (result of a+b), so it uses _INPLACE_RIGHT.
3427+
def testfunc(args):
3428+
a, b, n = args
3429+
total = 0
3430+
for _ in range(n):
3431+
total += a + b
3432+
return total
3433+
3434+
res, ex = self._run_with_optimizer(testfunc, (2000, 3000, TIER2_THRESHOLD))
3435+
self.assertEqual(res, TIER2_THRESHOLD * 5000)
3436+
self.assertIsNotNone(ex)
3437+
uops = get_opnames(ex)
3438+
# a + b: both are locals, no inplace
3439+
self.assertIn("_BINARY_OP_ADD_INT", uops)
3440+
# total += result: result is unique RHS
3441+
self.assertIn("_BINARY_OP_ADD_INT_INPLACE_RIGHT", uops)
3442+
# No LHS inplace variant for the first add
3443+
self.assertNotIn("_BINARY_OP_ADD_INT_INPLACE", uops)
3444+
3445+
def test_int_add_inplace_small_int_result(self):
3446+
# When the result is a small int, the inplace path falls back
3447+
# to _PyCompactLong_Add. Verify correctness (no singleton corruption).
3448+
def testfunc(args):
3449+
a, b, n = args
3450+
total = 0
3451+
for _ in range(n):
3452+
total += a * b + 1 # a*b=6, +1=7, small int
3453+
return total
3454+
3455+
res, ex = self._run_with_optimizer(testfunc, (2, 3, TIER2_THRESHOLD))
3456+
self.assertEqual(res, TIER2_THRESHOLD * 7)
3457+
# Verify small int singletons are not corrupted
3458+
self.assertEqual(7, 3 + 4)
3459+
3460+
def test_int_subtract_inplace_unique_lhs(self):
3461+
# a * b produces a unique compact int; subtracting c reuses it
3462+
def testfunc(args):
3463+
a, b, c, n = args
3464+
total = 0
3465+
for _ in range(n):
3466+
total += a * b - c
3467+
return total
3468+
3469+
res, ex = self._run_with_optimizer(testfunc, (2000, 3, 1000, TIER2_THRESHOLD))
3470+
self.assertEqual(res, TIER2_THRESHOLD * 5000)
3471+
self.assertIsNotNone(ex)
3472+
uops = get_opnames(ex)
3473+
self.assertIn("_BINARY_OP_SUBTRACT_INT_INPLACE", uops)
3474+
3475+
def test_int_subtract_inplace_unique_rhs(self):
3476+
# a * b produces a unique compact int on the right of -
3477+
def testfunc(args):
3478+
a, b, c, n = args
3479+
total = 0
3480+
for _ in range(n):
3481+
total += c - a * b
3482+
return total
3483+
3484+
res, ex = self._run_with_optimizer(testfunc, (2000, 3, 10000, TIER2_THRESHOLD))
3485+
self.assertEqual(res, TIER2_THRESHOLD * 4000)
3486+
self.assertIsNotNone(ex)
3487+
uops = get_opnames(ex)
3488+
self.assertIn("_BINARY_OP_SUBTRACT_INT_INPLACE_RIGHT", uops)
3489+
3490+
def test_int_multiply_inplace_unique_lhs(self):
3491+
# (a + b) produces a unique compact int; multiplying by c reuses it
3492+
def testfunc(args):
3493+
a, b, c, n = args
3494+
total = 0
3495+
for _ in range(n):
3496+
total += (a + b) * c
3497+
return total
3498+
3499+
res, ex = self._run_with_optimizer(testfunc, (2000, 3000, 4, TIER2_THRESHOLD))
3500+
self.assertEqual(res, TIER2_THRESHOLD * 20000)
3501+
self.assertIsNotNone(ex)
3502+
uops = get_opnames(ex)
3503+
self.assertIn("_BINARY_OP_MULTIPLY_INT_INPLACE", uops)
3504+
3505+
def test_int_multiply_inplace_unique_rhs(self):
3506+
# (a + b) produces a unique compact int on the right side of *
3507+
def testfunc(args):
3508+
a, b, c, n = args
3509+
total = 0
3510+
for _ in range(n):
3511+
total += c * (a + b)
3512+
return total
3513+
3514+
res, ex = self._run_with_optimizer(testfunc, (2000, 3000, 4, TIER2_THRESHOLD))
3515+
self.assertEqual(res, TIER2_THRESHOLD * 20000)
3516+
self.assertIsNotNone(ex)
3517+
uops = get_opnames(ex)
3518+
self.assertIn("_BINARY_OP_MULTIPLY_INT_INPLACE_RIGHT", uops)
3519+
3520+
def test_int_inplace_chain_propagation(self):
3521+
# a * b + c * d: both products are unique, the + reuses one;
3522+
# result of + is also unique for the subsequent +=
3523+
def testfunc(args):
3524+
a, b, c, d, n = args
3525+
total = 0
3526+
for _ in range(n):
3527+
total += a * b + c * d
3528+
return total
3529+
3530+
res, ex = self._run_with_optimizer(testfunc, (2000, 3, 4000, 5, TIER2_THRESHOLD))
3531+
self.assertEqual(res, TIER2_THRESHOLD * 26000)
3532+
self.assertIsNotNone(ex)
3533+
uops = get_opnames(ex)
3534+
inplace_add = (
3535+
"_BINARY_OP_ADD_INT_INPLACE" in uops
3536+
or "_BINARY_OP_ADD_INT_INPLACE_RIGHT" in uops
3537+
)
3538+
self.assertTrue(inplace_add,
3539+
"Expected an inplace add for unique intermediate results")
3540+
33933541
def test_load_attr_instance_value(self):
33943542
def testfunc(n):
33953543
class C():
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Optimize compact integer arithmetic in the JIT by mutating
2+
uniquely-referenced operands in place, avoiding allocation of a new int
3+
object. Speeds up the pyperformance ``spectral_norm`` benchmark by ~10%.

Python/bytecodes.c

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -708,6 +708,63 @@ dummy_func(
708708
macro(BINARY_OP_SUBTRACT_INT) =
709709
_GUARD_TOS_INT + _GUARD_NOS_INT + unused/5 + _BINARY_OP_SUBTRACT_INT + _POP_TOP_INT + _POP_TOP_INT;
710710

711+
// Inplace compact int ops: mutate the uniquely-referenced operand
712+
// when possible. The op handles decref of TARGET internally so
713+
// the following _POP_TOP_INT becomes _POP_TOP_NOP. Tier 2 only.
714+
tier2 op(_BINARY_OP_ADD_INT_INPLACE, (left, right -- res, l, r)) {
715+
INT_INPLACE_OP(left, right, left, +, _PyCompactLong_Add);
716+
EXIT_IF(PyStackRef_IsNull(_int_inplace_res));
717+
res = _int_inplace_res;
718+
l = left;
719+
r = right;
720+
INPUTS_DEAD();
721+
}
722+
723+
tier2 op(_BINARY_OP_SUBTRACT_INT_INPLACE, (left, right -- res, l, r)) {
724+
INT_INPLACE_OP(left, right, left, -, _PyCompactLong_Subtract);
725+
EXIT_IF(PyStackRef_IsNull(_int_inplace_res));
726+
res = _int_inplace_res;
727+
l = left;
728+
r = right;
729+
INPUTS_DEAD();
730+
}
731+
732+
tier2 op(_BINARY_OP_MULTIPLY_INT_INPLACE, (left, right -- res, l, r)) {
733+
INT_INPLACE_OP(left, right, left, *, _PyCompactLong_Multiply);
734+
EXIT_IF(PyStackRef_IsNull(_int_inplace_res));
735+
res = _int_inplace_res;
736+
l = left;
737+
r = right;
738+
INPUTS_DEAD();
739+
}
740+
741+
tier2 op(_BINARY_OP_ADD_INT_INPLACE_RIGHT, (left, right -- res, l, r)) {
742+
INT_INPLACE_OP(left, right, right, +, _PyCompactLong_Add);
743+
EXIT_IF(PyStackRef_IsNull(_int_inplace_res));
744+
res = _int_inplace_res;
745+
l = left;
746+
r = right;
747+
INPUTS_DEAD();
748+
}
749+
750+
tier2 op(_BINARY_OP_SUBTRACT_INT_INPLACE_RIGHT, (left, right -- res, l, r)) {
751+
INT_INPLACE_OP(left, right, right, -, _PyCompactLong_Subtract);
752+
EXIT_IF(PyStackRef_IsNull(_int_inplace_res));
753+
res = _int_inplace_res;
754+
l = left;
755+
r = right;
756+
INPUTS_DEAD();
757+
}
758+
759+
tier2 op(_BINARY_OP_MULTIPLY_INT_INPLACE_RIGHT, (left, right -- res, l, r)) {
760+
INT_INPLACE_OP(left, right, right, *, _PyCompactLong_Multiply);
761+
EXIT_IF(PyStackRef_IsNull(_int_inplace_res));
762+
res = _int_inplace_res;
763+
l = left;
764+
r = right;
765+
INPUTS_DEAD();
766+
}
767+
711768
op(_GUARD_NOS_FLOAT, (left, unused -- left, unused)) {
712769
PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
713770
EXIT_IF(!PyFloat_CheckExact(left_o));

0 commit comments

Comments
 (0)