Skip to content

Commit 6f66df8

Browse files
committed
Implement code object eq/hash, co_lnotab, and invalid bytecode support
- Add Comparable and Hashable traits for PyCode, comparing key attributes (name, argcount, flags, bytecode, consts, etc.) - Add deprecated co_lnotab property with DeprecationWarning, converting linetable to the old 2-byte pair format - Fill all 256 Instruction discriminants (add _Unused variants for gaps 121-127 and 212-233) so transmute from any u8 is defined - Raise SystemError for unknown opcodes in the VM dispatch loop - Remove @unittest.expectedFailure from test_shrinking_localsplus, test_co_lnotab_is_deprecated, test_invalid_bytecode - Restore @requires_debug_ranges() for test_co_positions_artificial_instructions
1 parent f49af3f commit 6f66df8

File tree

4 files changed

+209
-41
lines changed

4 files changed

+209
-41
lines changed

Lib/test/test_code.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -330,8 +330,6 @@ def func():
330330
with self.assertRaises(ValueError):
331331
co.replace(co_nlocals=co.co_nlocals + 1)
332332

333-
# TODO: RUSTPYTHON
334-
@unittest.expectedFailure
335333
def test_shrinking_localsplus(self):
336334
# Check that PyCode_NewWithPosOnlyArgs resizes both
337335
# localsplusnames and localspluskinds, if an argument is a cell.
@@ -347,17 +345,13 @@ def func():
347345
new_code = code = func.__code__.replace(co_linetable=b'')
348346
self.assertEqual(list(new_code.co_lines()), [])
349347

350-
# TODO: RUSTPYTHON
351-
@unittest.expectedFailure
352348
def test_co_lnotab_is_deprecated(self): # TODO: remove in 3.14
353349
def func():
354350
pass
355351

356352
with self.assertWarns(DeprecationWarning):
357353
func.__code__.co_lnotab
358354

359-
# TODO: RUSTPYTHON
360-
@unittest.expectedFailure
361355
def test_invalid_bytecode(self):
362356
def foo():
363357
pass
@@ -373,9 +367,7 @@ def foo():
373367
with self.assertRaisesRegex(SystemError, msg):
374368
foo()
375369

376-
# TODO: RUSTPYTHON
377-
@unittest.expectedFailure
378-
# @requires_debug_ranges()
370+
@requires_debug_ranges()
379371
def test_co_positions_artificial_instructions(self):
380372
import dis
381373

crates/compiler-core/src/bytecode/instruction.rs

Lines changed: 72 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,14 @@ pub enum Instruction {
302302
YieldValue {
303303
arg: Arg<u32>,
304304
} = 120,
305+
// Unused opcodes 121-127 (reserved for future use)
306+
_Unused121 = 121,
307+
_Unused122 = 122,
308+
_Unused123 = 123,
309+
_Unused124 = 124,
310+
_Unused125 = 125,
311+
_Unused126 = 126,
312+
_Unused127 = 127,
305313
// CPython 3.14 RESUME (128)
306314
Resume {
307315
context: Arg<u32>,
@@ -390,6 +398,29 @@ pub enum Instruction {
390398
UnpackSequenceList = 209, // Placeholder
391399
UnpackSequenceTuple = 210, // Placeholder
392400
UnpackSequenceTwoTuple = 211, // Placeholder
401+
// Unused opcodes 212-233 (reserved for future use)
402+
_Unused212 = 212,
403+
_Unused213 = 213,
404+
_Unused214 = 214,
405+
_Unused215 = 215,
406+
_Unused216 = 216,
407+
_Unused217 = 217,
408+
_Unused218 = 218,
409+
_Unused219 = 219,
410+
_Unused220 = 220,
411+
_Unused221 = 221,
412+
_Unused222 = 222,
413+
_Unused223 = 223,
414+
_Unused224 = 224,
415+
_Unused225 = 225,
416+
_Unused226 = 226,
417+
_Unused227 = 227,
418+
_Unused228 = 228,
419+
_Unused229 = 229,
420+
_Unused230 = 230,
421+
_Unused231 = 231,
422+
_Unused232 = 232,
423+
_Unused233 = 233,
393424
// CPython 3.14 instrumented opcodes (234-254)
394425
InstrumentedEndFor = 234,
395426
InstrumentedPopIter = 235,
@@ -430,36 +461,47 @@ impl TryFrom<u8> for Instruction {
430461

431462
#[inline]
432463
fn try_from(value: u8) -> Result<Self, MarshalError> {
433-
// CPython-compatible opcodes (0-120)
434-
let cpython_start = u8::from(Self::Cache);
435-
let cpython_end = u8::from(Self::YieldValue { arg: Arg::marker() });
436-
437-
// Resume has a non-contiguous opcode (128)
438-
let resume_id = u8::from(Self::Resume {
439-
context: Arg::marker(),
440-
});
441-
let enter_executor_id = u8::from(Self::EnterExecutor);
442-
443-
let specialized_start = u8::from(Self::BinaryOpAddFloat);
444-
let specialized_end = u8::from(Self::UnpackSequenceTwoTuple);
445-
446-
let instrumented_start = u8::from(Self::InstrumentedEndFor);
447-
let instrumented_end = u8::from(Self::InstrumentedLine);
448-
449-
// No RustPython-only opcodes anymore - all opcodes match CPython 3.14
450-
let custom_ops: &[u8] = &[];
464+
// All 256 values are valid Instruction discriminants.
465+
Ok(unsafe { mem::transmute::<u8, Self>(value) })
466+
}
467+
}
451468

452-
if (cpython_start..=cpython_end).contains(&value)
453-
|| value == resume_id
454-
|| value == enter_executor_id
455-
|| custom_ops.contains(&value)
456-
|| (specialized_start..=specialized_end).contains(&value)
457-
|| (instrumented_start..=instrumented_end).contains(&value)
458-
{
459-
Ok(unsafe { mem::transmute::<u8, Self>(value) })
460-
} else {
461-
Err(Self::Error::InvalidBytecode)
462-
}
469+
impl Instruction {
470+
/// Returns `true` if this opcode is a known, valid instruction.
471+
/// Unused/reserved opcodes return `false`.
472+
pub fn is_valid(self) -> bool {
473+
!matches!(
474+
self,
475+
Self::_Unused121
476+
| Self::_Unused122
477+
| Self::_Unused123
478+
| Self::_Unused124
479+
| Self::_Unused125
480+
| Self::_Unused126
481+
| Self::_Unused127
482+
| Self::_Unused212
483+
| Self::_Unused213
484+
| Self::_Unused214
485+
| Self::_Unused215
486+
| Self::_Unused216
487+
| Self::_Unused217
488+
| Self::_Unused218
489+
| Self::_Unused219
490+
| Self::_Unused220
491+
| Self::_Unused221
492+
| Self::_Unused222
493+
| Self::_Unused223
494+
| Self::_Unused224
495+
| Self::_Unused225
496+
| Self::_Unused226
497+
| Self::_Unused227
498+
| Self::_Unused228
499+
| Self::_Unused229
500+
| Self::_Unused230
501+
| Self::_Unused231
502+
| Self::_Unused232
503+
| Self::_Unused233
504+
)
463505
}
464506
}
465507

@@ -1081,6 +1123,7 @@ impl InstructionMetadata for Instruction {
10811123
Self::UnpackSequenceTwoTuple => (2, 1),
10821124
Self::WithExceptStart => (6, 5),
10831125
Self::YieldValue { .. } => (1, 1),
1126+
_ => (0, 0), // Unused/reserved opcodes
10841127
};
10851128

10861129
debug_assert!((0..=i32::MAX).contains(&pushed));

crates/vm/src/builtins/code.rs

Lines changed: 128 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use crate::{
1010
convert::{ToPyException, ToPyObject},
1111
frozen,
1212
function::OptionalArg,
13-
types::{Constructor, Representable},
13+
types::{Comparable, Constructor, Hashable, Representable},
1414
};
1515
use alloc::fmt;
1616
use core::{
@@ -447,6 +447,75 @@ impl Representable for PyCode {
447447
}
448448
}
449449

450+
impl Comparable for PyCode {
451+
fn cmp(
452+
zelf: &Py<Self>,
453+
other: &PyObject,
454+
op: crate::types::PyComparisonOp,
455+
vm: &VirtualMachine,
456+
) -> PyResult<crate::function::PyComparisonValue> {
457+
op.eq_only(|| {
458+
let other = class_or_notimplemented!(Self, other);
459+
let a = &zelf.code;
460+
let b = &other.code;
461+
let eq = a.obj_name == b.obj_name
462+
&& a.arg_count == b.arg_count
463+
&& a.posonlyarg_count == b.posonlyarg_count
464+
&& a.kwonlyarg_count == b.kwonlyarg_count
465+
&& a.flags == b.flags
466+
&& a.first_line_number == b.first_line_number
467+
&& a.instructions.original_bytes() == b.instructions.original_bytes()
468+
&& a.linetable == b.linetable
469+
&& a.exceptiontable == b.exceptiontable
470+
&& a.names == b.names
471+
&& a.varnames == b.varnames
472+
&& a.freevars == b.freevars
473+
&& a.cellvars == b.cellvars
474+
&& {
475+
let a_consts: Vec<_> = a.constants.iter().map(|c| c.0.clone()).collect();
476+
let b_consts: Vec<_> = b.constants.iter().map(|c| c.0.clone()).collect();
477+
if a_consts.len() != b_consts.len() {
478+
false
479+
} else {
480+
let mut eq = true;
481+
for (ac, bc) in a_consts.iter().zip(b_consts.iter()) {
482+
if !vm.bool_eq(ac, bc)? {
483+
eq = false;
484+
break;
485+
}
486+
}
487+
eq
488+
}
489+
};
490+
Ok(eq.into())
491+
})
492+
}
493+
}
494+
495+
impl Hashable for PyCode {
496+
fn hash(zelf: &Py<Self>, vm: &VirtualMachine) -> PyResult<crate::common::hash::PyHash> {
497+
let code = &zelf.code;
498+
// Hash a tuple of key attributes, matching CPython's code_hash
499+
let tuple = vm.ctx.new_tuple(vec![
500+
vm.ctx.new_str(code.obj_name.as_str()).into(),
501+
vm.ctx.new_int(code.arg_count).into(),
502+
vm.ctx.new_int(code.posonlyarg_count).into(),
503+
vm.ctx.new_int(code.kwonlyarg_count).into(),
504+
vm.ctx.new_int(code.varnames.len()).into(),
505+
vm.ctx.new_int(code.flags.bits()).into(),
506+
vm.ctx
507+
.new_int(code.first_line_number.map_or(0, |n| n.get()) as i64)
508+
.into(),
509+
vm.ctx.new_bytes(code.instructions.original_bytes()).into(),
510+
{
511+
let consts: Vec<_> = code.constants.iter().map(|c| c.0.clone()).collect();
512+
vm.ctx.new_tuple(consts).into()
513+
},
514+
]);
515+
tuple.as_object().hash(vm)
516+
}
517+
}
518+
450519
// Arguments for code object constructor
451520
#[derive(FromArgs)]
452521
pub struct PyCodeNewArgs {
@@ -595,7 +664,7 @@ impl Constructor for PyCode {
595664
}
596665
}
597666

598-
#[pyclass(with(Representable, Constructor), flags(HAS_WEAKREF))]
667+
#[pyclass(with(Representable, Constructor, Comparable, Hashable), flags(HAS_WEAKREF))]
599668
impl PyCode {
600669
#[pygetset]
601670
const fn co_posonlyargcount(&self) -> usize {
@@ -721,6 +790,63 @@ impl PyCode {
721790
vm.ctx.new_bytes(self.code.exceptiontable.to_vec())
722791
}
723792

793+
#[pygetset]
794+
pub fn co_lnotab(&self, vm: &VirtualMachine) -> PyResult<crate::builtins::PyBytesRef> {
795+
crate::warn::warn(
796+
vm.ctx
797+
.new_str("co_lnotab is deprecated, use co_lines instead.")
798+
.into(),
799+
Some(vm.ctx.exceptions.deprecation_warning.to_owned()),
800+
1,
801+
None,
802+
vm,
803+
)?;
804+
// Convert linetable to the old lnotab format: pairs of (addr_incr, line_incr)
805+
let linetable = self.code.linetable.as_ref();
806+
let mut lnotab = Vec::new();
807+
if !linetable.is_empty() {
808+
let first_line = self.code.first_line_number.map_or(0, |n| n.get() as i32);
809+
let mut range = PyCodeAddressRange::new(linetable, first_line);
810+
let mut prev_addr: i32 = 0;
811+
let mut prev_line = first_line;
812+
while range.advance() {
813+
let addr = range.ar_start;
814+
let line = range.ar_line;
815+
if line == -1 {
816+
continue;
817+
}
818+
let mut addr_incr = addr - prev_addr;
819+
let mut line_incr = line - prev_line;
820+
// Emit addr_incr in chunks of 255
821+
while addr_incr > 255 {
822+
lnotab.push(255u8);
823+
lnotab.push(0u8);
824+
addr_incr -= 255;
825+
}
826+
// Emit line_incr in chunks (signed, -128..127 per step)
827+
while line_incr > 127 {
828+
lnotab.push(addr_incr as u8);
829+
lnotab.push(127u8);
830+
line_incr -= 127;
831+
addr_incr = 0;
832+
}
833+
while line_incr < -128 {
834+
lnotab.push(addr_incr as u8);
835+
lnotab.push((-128i8) as u8);
836+
line_incr += 128;
837+
addr_incr = 0;
838+
}
839+
if addr_incr != 0 || line_incr != 0 {
840+
lnotab.push(addr_incr as u8);
841+
lnotab.push(line_incr as u8);
842+
}
843+
prev_addr = addr;
844+
prev_line = line;
845+
}
846+
}
847+
Ok(vm.ctx.new_bytes(lnotab))
848+
}
849+
724850
#[pymethod]
725851
pub fn co_lines(&self, vm: &VirtualMachine) -> PyResult<PyObjectRef> {
726852
// TODO: Implement lazy iterator (lineiterator) like CPython for better performance

crates/vm/src/frame.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5984,7 +5984,14 @@ impl ExecutingFrame<'_> {
59845984
result
59855985
}
59865986
_ => {
5987-
unreachable!("{instruction:?} instruction should not be executed")
5987+
if !instruction.is_valid() {
5988+
Err(vm.new_system_error(format!(
5989+
"unknown opcode {}",
5990+
u8::from(instruction)
5991+
)))
5992+
} else {
5993+
unreachable!("{instruction:?} instruction should not be executed")
5994+
}
59885995
}
59895996
}
59905997
}

0 commit comments

Comments
 (0)