Skip to content

Commit 9a1e768

Browse files
committed
rewrite compiler
1 parent c8d9e73 commit 9a1e768

File tree

12 files changed

+3431
-1098
lines changed

12 files changed

+3431
-1098
lines changed

crates/codegen/src/compile.rs

Lines changed: 1624 additions & 374 deletions
Large diffs are not rendered by default.

crates/codegen/src/error.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,9 @@ pub enum CodegenErrorType {
7676
InvalidYield,
7777
InvalidYieldFrom,
7878
InvalidAwait,
79+
InvalidAsyncFor,
80+
InvalidAsyncWith,
81+
InvalidAsyncComprehension,
7982
AsyncYieldFrom,
8083
AsyncReturnValue,
8184
InvalidFuturePlacement,
@@ -113,6 +116,14 @@ impl fmt::Display for CodegenErrorType {
113116
InvalidYield => write!(f, "'yield' outside function"),
114117
InvalidYieldFrom => write!(f, "'yield from' outside function"),
115118
InvalidAwait => write!(f, "'await' outside async function"),
119+
InvalidAsyncFor => write!(f, "'async for' outside async function"),
120+
InvalidAsyncWith => write!(f, "'async with' outside async function"),
121+
InvalidAsyncComprehension => {
122+
write!(
123+
f,
124+
"asynchronous comprehension outside of an asynchronous function"
125+
)
126+
}
116127
AsyncYieldFrom => write!(f, "'yield from' inside async function"),
117128
AsyncReturnValue => {
118129
write!(f, "'return' with value inside async generator")

crates/codegen/src/ir.rs

Lines changed: 154 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@ use crate::{IndexMap, IndexSet, error::InternalError};
44
use rustpython_compiler_core::{
55
OneIndexed, SourceLocation,
66
bytecode::{
7-
CodeFlags, CodeObject, CodeUnit, CodeUnits, ConstantData, InstrDisplayContext, Instruction,
8-
Label, OpArg, PyCodeLocationInfoKind,
7+
CodeFlags, CodeObject, CodeUnit, CodeUnits, ConstantData, ExceptionTableEntry,
8+
InstrDisplayContext, Instruction, Label, OpArg, PyCodeLocationInfoKind,
9+
encode_exception_table,
910
},
1011
};
1112

@@ -88,6 +89,18 @@ pub struct InstructionInfo {
8889
pub target: BlockIdx,
8990
pub location: SourceLocation,
9091
pub end_location: SourceLocation,
92+
pub except_handler: Option<ExceptHandlerInfo>,
93+
}
94+
95+
/// Exception handler information for an instruction
96+
#[derive(Debug, Clone)]
97+
pub struct ExceptHandlerInfo {
98+
/// Block to jump to when exception occurs
99+
pub handler_block: BlockIdx,
100+
/// Stack depth at handler entry
101+
pub stack_depth: u32,
102+
/// Whether to push lasti before exception
103+
pub preserve_lasti: bool,
91104
}
92105

93106
// spell-checker:ignore petgraph
@@ -176,12 +189,19 @@ impl CodeInfo {
176189
let mut locations = Vec::new();
177190

178191
let mut block_to_offset = vec![Label(0); blocks.len()];
192+
// block_to_index: maps block idx to instruction index (for exception table)
193+
// This is the index into the final instructions array, including EXTENDED_ARG
194+
let mut block_to_index = vec![0u32; blocks.len()];
179195
loop {
180196
let mut num_instructions = 0;
181197
for (idx, block) in iter_blocks(&blocks) {
182198
block_to_offset[idx.idx()] = Label(num_instructions as u32);
199+
// block_to_index uses the same value as block_to_offset but as u32
200+
// because lasti in frame.rs is the index into instructions array
201+
// and instructions array index == byte offset (each instruction is 1 CodeUnit)
202+
block_to_index[idx.idx()] = num_instructions as u32;
183203
for instr in &block.instructions {
184-
num_instructions += instr.arg.instr_size()
204+
num_instructions += instr.arg.instr_size();
185205
}
186206
}
187207

@@ -228,6 +248,9 @@ impl CodeInfo {
228248
opts.debug_ranges,
229249
);
230250

251+
// Generate exception table before moving source_path
252+
let exceptiontable = generate_exception_table(&blocks, &block_to_index);
253+
231254
Ok(CodeObject {
232255
flags,
233256
posonlyarg_count,
@@ -248,7 +271,7 @@ impl CodeInfo {
248271
freevars: freevar_cache.into_iter().collect(),
249272
cell2arg,
250273
linetable,
251-
exceptiontable: Box::new([]), // TODO: Generate actual exception table
274+
exceptiontable,
252275
})
253276
}
254277

@@ -305,12 +328,24 @@ impl CodeInfo {
305328
start_depths[0] = 0;
306329
stack.push(BlockIdx(0));
307330
const DEBUG: bool = false;
308-
'process_blocks: while let Some(block) = stack.pop() {
309-
let mut depth = start_depths[block.idx()];
331+
// Global iteration limit as safety guard
332+
// The algorithm is monotonic (depths only increase), so it should converge quickly.
333+
// Max iterations = blocks * max_possible_depth_increases per block
334+
let max_iterations = self.blocks.len() * 100;
335+
let mut iterations = 0usize;
336+
'process_blocks: while let Some(block_idx) = stack.pop() {
337+
iterations += 1;
338+
if iterations > max_iterations {
339+
// Safety guard: should never happen in valid code
340+
// Return error instead of silently breaking to avoid underestimated stack depth
341+
return Err(InternalError::StackOverflow);
342+
}
343+
let idx = block_idx.idx();
344+
let mut depth = start_depths[idx];
310345
if DEBUG {
311-
eprintln!("===BLOCK {}===", block.0);
346+
eprintln!("===BLOCK {}===", block_idx.0);
312347
}
313-
let block = &self.blocks[block];
348+
let block = &self.blocks[block_idx];
314349
for ins in &block.instructions {
315350
let instr = &ins.instr;
316351
let effect = instr.stack_effect(ins.arg, false);
@@ -336,15 +371,8 @@ impl CodeInfo {
336371
if new_depth > maxdepth {
337372
maxdepth = new_depth
338373
}
339-
// we don't want to worry about Break/Continue, they use unwinding to jump to
340-
// their targets and as such the stack size is taken care of in frame.rs by setting
341-
// it back to the level it was at when SetupLoop was run
342-
if ins.target != BlockIdx::NULL
343-
&& !matches!(
344-
instr,
345-
Instruction::Continue { .. } | Instruction::Break { .. }
346-
)
347-
{
374+
// Process target blocks for branching instructions
375+
if ins.target != BlockIdx::NULL {
348376
let effect = instr.stack_effect(ins.arg, true);
349377
let target_depth = depth.checked_add_signed(effect).ok_or({
350378
if effect < 0 {
@@ -358,6 +386,35 @@ impl CodeInfo {
358386
}
359387
stackdepth_push(&mut stack, &mut start_depths, ins.target, target_depth);
360388
}
389+
// Process exception handler blocks
390+
// When exception occurs, stack is unwound to handler.stack_depth, then:
391+
// - If preserve_lasti: push lasti (+1)
392+
// - Push exception (+1)
393+
// - Handler block starts with PUSH_EXC_INFO as its first instruction
394+
// So the starting depth for the handler block (BEFORE PUSH_EXC_INFO) is:
395+
// handler.stack_depth + preserve_lasti + 1 (exc)
396+
// PUSH_EXC_INFO will then add +1 when the block is processed
397+
if let Some(ref handler) = ins.except_handler {
398+
let handler_depth = handler.stack_depth + 1 + (handler.preserve_lasti as u32); // +1 for exception, +1 for lasti if preserve_lasti
399+
if DEBUG {
400+
eprintln!(
401+
" HANDLER: block={} depth={} (base={} lasti={})",
402+
handler.handler_block.0,
403+
handler_depth,
404+
handler.stack_depth,
405+
handler.preserve_lasti
406+
);
407+
}
408+
if handler_depth > maxdepth {
409+
maxdepth = handler_depth;
410+
}
411+
stackdepth_push(
412+
&mut stack,
413+
&mut start_depths,
414+
handler.handler_block,
415+
handler_depth,
416+
);
417+
}
361418
depth = new_depth;
362419
if instr.unconditional_branch() {
363420
continue 'process_blocks;
@@ -401,8 +458,10 @@ fn stackdepth_push(
401458
target: BlockIdx,
402459
depth: u32,
403460
) {
404-
let block_depth = &mut start_depths[target.idx()];
405-
if *block_depth == u32::MAX || depth > *block_depth {
461+
let idx = target.idx();
462+
let block_depth = &mut start_depths[idx];
463+
if depth > *block_depth || *block_depth == u32::MAX {
464+
// Found a path with higher depth (or first visit): update max and queue
406465
*block_depth = depth;
407466
stack.push(target);
408467
}
@@ -420,7 +479,7 @@ fn iter_blocks(blocks: &[Block]) -> impl Iterator<Item = (BlockIdx, &Block)> + '
420479
})
421480
}
422481

423-
/// Generate CPython 3.11+ format linetable from source locations
482+
/// Generate Python 3.11+ format linetable from source locations
424483
fn generate_linetable(
425484
locations: &[(SourceLocation, SourceLocation)],
426485
first_line: i32,
@@ -564,3 +623,78 @@ fn write_signed_varint(buf: &mut Vec<u8>, val: i32) -> usize {
564623
};
565624
write_varint(buf, uval)
566625
}
626+
627+
/// Generate Python 3.11+ exception table from instruction handler info
628+
fn generate_exception_table(blocks: &[Block], block_to_index: &[u32]) -> Box<[u8]> {
629+
let mut entries: Vec<ExceptionTableEntry> = Vec::new();
630+
let mut current_entry: Option<(ExceptHandlerInfo, u32)> = None; // (handler_info, start_index)
631+
let mut instr_index = 0u32;
632+
633+
// Iterate through all instructions in block order
634+
// instr_index is the index into the final instructions array (including EXTENDED_ARG)
635+
// This matches how frame.rs uses lasti
636+
for (_, block) in iter_blocks(blocks) {
637+
for instr in &block.instructions {
638+
// instr_size includes EXTENDED_ARG instructions
639+
let instr_size = instr.arg.instr_size() as u32;
640+
641+
match (&current_entry, &instr.except_handler) {
642+
// No current entry, no handler - nothing to do
643+
(None, None) => {}
644+
645+
// No current entry, handler starts - begin new entry
646+
(None, Some(handler)) => {
647+
current_entry = Some((handler.clone(), instr_index));
648+
}
649+
650+
// Current entry exists, same handler - continue
651+
(Some((curr_handler, _)), Some(handler))
652+
if curr_handler.handler_block == handler.handler_block
653+
&& curr_handler.stack_depth == handler.stack_depth
654+
&& curr_handler.preserve_lasti == handler.preserve_lasti => {}
655+
656+
// Current entry exists, different handler - finish current, start new
657+
(Some((curr_handler, start)), Some(handler)) => {
658+
let target_index = block_to_index[curr_handler.handler_block.idx()];
659+
entries.push(ExceptionTableEntry::new(
660+
*start,
661+
instr_index,
662+
target_index,
663+
curr_handler.stack_depth as u16,
664+
curr_handler.preserve_lasti,
665+
));
666+
current_entry = Some((handler.clone(), instr_index));
667+
}
668+
669+
// Current entry exists, no handler - finish current entry
670+
(Some((curr_handler, start)), None) => {
671+
let target_index = block_to_index[curr_handler.handler_block.idx()];
672+
entries.push(ExceptionTableEntry::new(
673+
*start,
674+
instr_index,
675+
target_index,
676+
curr_handler.stack_depth as u16,
677+
curr_handler.preserve_lasti,
678+
));
679+
current_entry = None;
680+
}
681+
}
682+
683+
instr_index += instr_size; // Account for EXTENDED_ARG instructions
684+
}
685+
}
686+
687+
// Finish any remaining entry
688+
if let Some((curr_handler, start)) = current_entry {
689+
let target_index = block_to_index[curr_handler.handler_block.idx()];
690+
entries.push(ExceptionTableEntry::new(
691+
start,
692+
instr_index,
693+
target_index,
694+
curr_handler.stack_depth as u16,
695+
curr_handler.preserve_lasti,
696+
));
697+
}
698+
699+
encode_exception_table(&entries)
700+
}

0 commit comments

Comments
 (0)