From 22a926f7493a1c0a2d40c068e53821c870bb0930 Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Thu, 30 Oct 2025 03:37:05 +0200 Subject: [PATCH 01/23] Add `--dump-spirt` for dumping only the final SPIR-T module (unlike `--dump-spirt-passes`). --- .../rustc_codegen_spirv/src/codegen_cx/mod.rs | 29 ++++++++++++-- crates/rustc_codegen_spirv/src/linker/mod.rs | 40 ++++++++++++++----- docs/src/codegen-args.md | 8 ++++ 3 files changed, 63 insertions(+), 14 deletions(-) diff --git a/crates/rustc_codegen_spirv/src/codegen_cx/mod.rs b/crates/rustc_codegen_spirv/src/codegen_cx/mod.rs index c8acc4bc1d..1d27a5c4e9 100644 --- a/crates/rustc_codegen_spirv/src/codegen_cx/mod.rs +++ b/crates/rustc_codegen_spirv/src/codegen_cx/mod.rs @@ -331,16 +331,19 @@ pub struct CodegenArgs { impl CodegenArgs { pub fn from_session(sess: &Session) -> Self { - match CodegenArgs::parse(&sess.opts.cg.llvm_args) { + match Self::parse(sess, &sess.opts.cg.llvm_args) { Ok(ok) => ok, + + // FIXME(eddyb) this should mention `RUSTGPU_CODEGEN_ARGS`, just + // like how `RUSTGPU_CODEGEN_ARGS=--help` is already special-cased. Err(err) => sess .dcx() .fatal(format!("Unable to parse llvm-args: {err}")), } } - // FIXME(eddyb) `structopt` would come a long way to making this nicer. - pub fn parse(args: &[String]) -> Result { + // FIXME(eddyb) switch all of this over to `clap`. + pub fn parse(sess: &Session, args: &[String]) -> Result { use rustc_session::getopts; // FIXME(eddyb) figure out what casing ("Foo bar" vs "foo bar") to use @@ -459,6 +462,12 @@ impl CodegenArgs { "dump the SPIR-T module across passes, to a (pair of) file(s) in DIR", "DIR", ); + opts.optopt( + "", + "dump-spirt", + "dump the final SPIR-T module, to a (pair of) file(s) in DIR", + "DIR", + ); opts.optflag( "", "spirt-strip-custom-debuginfo-from-dumps", @@ -627,7 +636,19 @@ impl CodegenArgs { dump_pre_inline: matches_opt_dump_dir_path("dump-pre-inline"), dump_post_inline: matches_opt_dump_dir_path("dump-post-inline"), dump_post_split: matches_opt_dump_dir_path("dump-post-split"), - dump_spirt_passes: matches_opt_dump_dir_path("dump-spirt-passes"), + dump_spirt: match ["dump-spirt-passes", "dump-spirt"].map(matches_opt_dump_dir_path) { + [Some(dump_spirt_passes), dump_spirt] => { + if dump_spirt.is_some() { + sess.dcx() + .warn("`--dump-spirt` ignored in favor of `--dump-spirt-passes`"); + } + Some((dump_spirt_passes, crate::linker::DumpSpirtMode::AllPasses)) + } + [None, Some(dump_spirt)] => { + Some((dump_spirt, crate::linker::DumpSpirtMode::OnlyFinal)) + } + [None, None] => None, + }, spirt_strip_custom_debuginfo_from_dumps: matches .opt_present("spirt-strip-custom-debuginfo-from-dumps"), spirt_keep_debug_sources_in_dumps: matches diff --git a/crates/rustc_codegen_spirv/src/linker/mod.rs b/crates/rustc_codegen_spirv/src/linker/mod.rs index d289c8e5fc..e0d612cab2 100644 --- a/crates/rustc_codegen_spirv/src/linker/mod.rs +++ b/crates/rustc_codegen_spirv/src/linker/mod.rs @@ -61,13 +61,19 @@ pub struct Options { pub dump_pre_inline: Option, pub dump_post_inline: Option, pub dump_post_split: Option, - pub dump_spirt_passes: Option, + pub dump_spirt: Option<(PathBuf, DumpSpirtMode)>, pub spirt_strip_custom_debuginfo_from_dumps: bool, pub spirt_keep_debug_sources_in_dumps: bool, pub spirt_keep_unstructured_cfg_in_dumps: bool, pub specializer_dump_instances: Option, } +#[derive(Copy, Clone, PartialEq, Eq)] +pub enum DumpSpirtMode { + AllPasses, + OnlyFinal, +} + pub enum LinkResult { SingleModule(Box), MultipleModules { @@ -520,7 +526,7 @@ pub fn link( drop(timer); let pass_name = dump_guard.in_progress_pass_name.take().unwrap(); if let Some(module) = module - && opts.dump_spirt_passes.is_some() + && matches!(opts.dump_spirt, Some((_, DumpSpirtMode::AllPasses))) { dump_guard .per_pass_module_for_dumping @@ -801,9 +807,9 @@ impl Drop for SpirtDumpGuard<'_> { let mut dump_spirt_file_path = self.linker_options - .dump_spirt_passes + .dump_spirt .as_ref() - .map(|dump_dir| { + .map(|(dump_dir, _)| { dump_dir .join(self.disambiguated_crate_name_for_dumps) .with_extension("spirt") @@ -815,10 +821,6 @@ impl Drop for SpirtDumpGuard<'_> { // but that requires keeping around e.g. the initial SPIR-V for longer, // and probably invoking the "SPIR-T pipeline" here, as looping is hard). if self.any_spirt_bugs && dump_spirt_file_path.is_none() { - if self.per_pass_module_for_dumping.is_empty() { - self.per_pass_module_for_dumping - .push(("".into(), self.module.clone())); - } dump_spirt_file_path = Some(self.outputs.temp_path_for_diagnostic("spirt")); } @@ -826,6 +828,11 @@ impl Drop for SpirtDumpGuard<'_> { return; }; + if self.per_pass_module_for_dumping.is_empty() { + self.per_pass_module_for_dumping + .push(("".into(), self.module.clone())); + } + for (_, module) in &mut self.per_pass_module_for_dumping { // FIXME(eddyb) consider catching panics in this? self.linker_options.spirt_cleanup_for_dumping(module); @@ -835,7 +842,16 @@ impl Drop for SpirtDumpGuard<'_> { let versions = self .per_pass_module_for_dumping .iter() - .map(|(pass_name, module)| (format!("after {pass_name}"), module)); + .map(|(pass_name, module)| { + ( + if pass_name.is_empty() { + "".into() + } else { + format!("after {pass_name}") + }, + module, + ) + }); let mut panicked_printing_after_passes = None; for truncate_version_count in (1..=versions.len()).rev() { @@ -896,7 +912,11 @@ impl Drop for SpirtDumpGuard<'_> { "pretty-printed SPIR-T was saved to {}.html", dump_spirt_file_path.display() )); - if self.linker_options.dump_spirt_passes.is_none() { + let is_dumping_spirt_passes = matches!( + self.linker_options.dump_spirt, + Some((_, DumpSpirtMode::AllPasses)) + ); + if !is_dumping_spirt_passes { note.help("re-run with `RUSTGPU_CODEGEN_ARGS=\"--dump-spirt-passes=$PWD\"` for more details"); } note.note("pretty-printed SPIR-T is preferred when reporting Rust-GPU issues"); diff --git a/docs/src/codegen-args.md b/docs/src/codegen-args.md index 71fb69d6d2..687baf0cf0 100644 --- a/docs/src/codegen-args.md +++ b/docs/src/codegen-args.md @@ -175,6 +175,14 @@ _Note: passes that are not already enabled by default are considered experimenta Dump the `SPIR-🇹` module across passes (i.e. all of the versions before/after each pass), as a combined report, to a pair of files (`.spirt` and `.spirt.html`) in `DIR`. (the `.spirt.html` version of the report is the recommended form for viewing, as it uses tabling for versions, syntax-highlighting-like styling, and use->def linking) +Mutually exclusive with `--dump-spirt` (this takes precedence over that). + +### `--dump-spirt DIR` + +Dump the `SPIR-🇹` module, similar to `--dump-spirt-passes`, but only the final version. + +Mutually exclusive with `--dump-spirt-passes` (which takes precedence over this). + ### `--spirt-strip-custom-debuginfo-from-dumps` When dumping (pretty-printed) `SPIR-🇹` (e.g. with `--dump-spirt-passes`), strip From bf3a0ee79bbd9bc18150ec17d7172c1ad6026bf3 Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 28 Oct 2025 13:42:42 +0200 Subject: [PATCH 02/23] builder: use SPIR-V instructions for `checked_{add,sub,mul}` and `saturating_{add,sub}`. --- crates/rustc_codegen_spirv/src/abi.rs | 9 -- .../src/builder/builder_methods.rs | 122 ++++++++++++------ .../src/builder/intrinsics.rs | 46 ++++--- 3 files changed, 107 insertions(+), 70 deletions(-) diff --git a/crates/rustc_codegen_spirv/src/abi.rs b/crates/rustc_codegen_spirv/src/abi.rs index 0c5cf69f4d..e86822fe82 100644 --- a/crates/rustc_codegen_spirv/src/abi.rs +++ b/crates/rustc_codegen_spirv/src/abi.rs @@ -616,15 +616,6 @@ fn trans_aggregate<'tcx>(cx: &CodegenCx<'tcx>, span: Span, ty: TyAndLayout<'tcx> } } -#[cfg_attr( - not(rustc_codegen_spirv_disable_pqp_cg_ssa), - expect( - unused, - reason = "actually used from \ - `>::const_struct`, \ - but `rustc_codegen_ssa` being `pqp_cg_ssa` makes that trait unexported" - ) -)] // returns (field_offsets, size, align) pub fn auto_struct_layout( cx: &CodegenCx<'_>, diff --git a/crates/rustc_codegen_spirv/src/builder/builder_methods.rs b/crates/rustc_codegen_spirv/src/builder/builder_methods.rs index 7b6de35187..b746ad091a 100644 --- a/crates/rustc_codegen_spirv/src/builder/builder_methods.rs +++ b/crates/rustc_codegen_spirv/src/builder/builder_methods.rs @@ -26,6 +26,7 @@ use rustc_codegen_ssa::traits::{ }; use rustc_middle::bug; use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs; +use rustc_middle::ty::layout::LayoutOf; use rustc_middle::ty::{self, AtomicOrdering, Ty}; use rustc_span::Span; use rustc_target::callconv::FnAbi; @@ -1721,30 +1722,15 @@ impl<'a, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'tcx> { fn checked_binop( &mut self, oop: OverflowOp, - ty: Ty<'_>, + ty: Ty<'tcx>, lhs: Self::Value, rhs: Self::Value, ) -> (Self::Value, Self::Value) { - // adopted partially from https://github.com/ziglang/zig/blob/master/src/codegen/spirv.zig - let is_add = match oop { - OverflowOp::Add => true, - OverflowOp::Sub => false, - OverflowOp::Mul => { - // NOTE(eddyb) this needs to be `undef`, not `false`/`true`, because - // we don't want the user's boolean constants to keep the zombie alive. - let bool = SpirvType::Bool.def(self.span(), self); - let overflowed = self.undef(bool); - - let result = (self.mul(lhs, rhs), overflowed); - self.zombie(result.1.def(self), "checked mul is not supported yet"); - return result; - } - }; let signed = match ty.kind() { ty::Int(_) => true, ty::Uint(_) => false, - other => self.fatal(format!( - "Unexpected {} type: {other:#?}", + _ => self.fatal(format!( + "unexpected {} type: {ty}", match oop { OverflowOp::Add => "checked add", OverflowOp::Sub => "checked sub", @@ -1753,13 +1739,17 @@ impl<'a, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'tcx> { )), }; - let result = if is_add { - self.add(lhs, rhs) - } else { - self.sub(lhs, rhs) - }; + // HACK(eddyb) SPIR-V `OpIAddCarry`/`OpISubBorrow` are specifically for + // unsigned overflow, so signed overflow still needs this custom logic. + if signed && let OverflowOp::Add | OverflowOp::Sub = oop { + let result = match oop { + OverflowOp::Add => self.add(lhs, rhs), + OverflowOp::Sub => self.sub(lhs, rhs), + OverflowOp::Mul => unreachable!(), + }; + + // adopted partially from https://github.com/ziglang/zig/blob/master/src/codegen/spirv.zig - let overflowed = if signed { // when adding, overflow could happen if // - rhs is positive and result < lhs; or // - rhs is negative and result > lhs @@ -1771,30 +1761,80 @@ impl<'a, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'tcx> { // this is equivalent to (rhs < 0) == (result < lhs) let rhs_lt_zero = self.icmp(IntPredicate::IntSLT, rhs, self.constant_int(rhs.ty, 0)); let result_gt_lhs = self.icmp( - if is_add { - IntPredicate::IntSGT - } else { - IntPredicate::IntSLT + match oop { + OverflowOp::Add => IntPredicate::IntSGT, + OverflowOp::Sub => IntPredicate::IntSLT, + OverflowOp::Mul => unreachable!(), }, result, lhs, ); - self.icmp(IntPredicate::IntEQ, rhs_lt_zero, result_gt_lhs) - } else { - // for unsigned addition, overflow occurred if the result is less than any of the operands. - // for subtraction, overflow occurred if the result is greater. - self.icmp( - if is_add { - IntPredicate::IntULT + + let overflowed = self.icmp(IntPredicate::IntEQ, rhs_lt_zero, result_gt_lhs); + + return (result, overflowed); + } + + let result_type = self.layout_of(ty).spirv_type(self.span(), self); + let pair_result_type = { + let field_types = [result_type, result_type]; + let (field_offsets, size, align) = crate::abi::auto_struct_layout(self, &field_types); + SpirvType::Adt { + def_id: None, + size, + align, + field_types: &field_types, + field_offsets: &field_offsets, + field_names: None, + } + .def(self.span(), self) + }; + + let lhs = lhs.def(self); + let rhs = rhs.def(self); + let pair_result = match oop { + OverflowOp::Add => self + .emit() + .i_add_carry(pair_result_type, None, lhs, rhs) + .unwrap(), + OverflowOp::Sub => self + .emit() + .i_sub_borrow(pair_result_type, None, lhs, rhs) + .unwrap(), + OverflowOp::Mul => { + if signed { + self.emit() + .s_mul_extended(pair_result_type, None, lhs, rhs) + .unwrap() } else { - IntPredicate::IntUGT - }, - result, - lhs, - ) + self.emit() + .u_mul_extended(pair_result_type, None, lhs, rhs) + .unwrap() + } + } + } + .with_type(pair_result_type); + let result_lo = self.extract_value(pair_result, 0); + let result_hi = self.extract_value(pair_result, 1); + + // HACK(eddyb) SPIR-V lacks any `(T, T) -> (T, bool)` instructions, + // so instead `result_hi` is compared with the value expected in the + // non-overflow case (`0`, or `-1` for negative signed multiply result). + let expected_nonoverflowing_hi = match (oop, signed) { + (OverflowOp::Add | OverflowOp::Sub, _) | (OverflowOp::Mul, false) => { + self.const_uint(result_type, 0) + } + (OverflowOp::Mul, true) => { + // HACK(eddyb) `(x: iN) >> (N - 1)` will spread the sign bit + // across all `N` bits of `iN`, and should be equivalent to + // `if x < 0 { -1 } else { 0 }`, without needing compare+select). + let result_width = u32::try_from(self.int_width(result_type)).unwrap(); + self.ashr(result_lo, self.const_u32(result_width - 1)) + } }; + let overflowed = self.icmp(IntPredicate::IntNE, result_hi, expected_nonoverflowing_hi); - (result, overflowed) + (result_lo, overflowed) } // rustc has the concept of an immediate vs. memory type - bools are compiled to LLVM bools as diff --git a/crates/rustc_codegen_spirv/src/builder/intrinsics.rs b/crates/rustc_codegen_spirv/src/builder/intrinsics.rs index 0fcaec8513..b4bedd8d56 100644 --- a/crates/rustc_codegen_spirv/src/builder/intrinsics.rs +++ b/crates/rustc_codegen_spirv/src/builder/intrinsics.rs @@ -115,33 +115,39 @@ impl<'a, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'tcx> { sym::saturating_add => { assert_eq!(arg_tys[0], arg_tys[1]); - let result = match arg_tys[0].kind() { - TyKind::Int(_) | TyKind::Uint(_) => { - self.add(args[0].immediate(), args[1].immediate()) - } - TyKind::Float(_) => self.fadd(args[0].immediate(), args[1].immediate()), + match arg_tys[0].kind() { + TyKind::Int(_) | TyKind::Uint(_) => self + .emit() + .i_add_sat_intel( + ret_ty, + None, + args[0].immediate().def(self), + args[1].immediate().def(self), + ) + .unwrap() + .with_type(ret_ty), other => self.fatal(format!( - "Unimplemented saturating_add intrinsic type: {other:#?}" + "unimplemented saturating_add intrinsic type: {other:#?}" )), - }; - // TODO: Implement this - self.zombie(result.def(self), "saturating_add is not implemented yet"); - result + } } sym::saturating_sub => { assert_eq!(arg_tys[0], arg_tys[1]); - let result = match &arg_tys[0].kind() { - TyKind::Int(_) | TyKind::Uint(_) => { - self.sub(args[0].immediate(), args[1].immediate()) - } - TyKind::Float(_) => self.fsub(args[0].immediate(), args[1].immediate()), + match &arg_tys[0].kind() { + TyKind::Int(_) | TyKind::Uint(_) => self + .emit() + .i_sub_sat_intel( + ret_ty, + None, + args[0].immediate().def(self), + args[1].immediate().def(self), + ) + .unwrap() + .with_type(ret_ty), other => self.fatal(format!( - "Unimplemented saturating_sub intrinsic type: {other:#?}" + "unimplemented saturating_sub intrinsic type: {other:#?}" )), - }; - // TODO: Implement this - self.zombie(result.def(self), "saturating_sub is not implemented yet"); - result + } } sym::sqrtf32 | sym::sqrtf64 | sym::sqrtf128 => { From 583666b68568d206f7666e3d71aeb24c3a8d2e2a Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Mon, 13 Oct 2025 07:14:12 +0300 Subject: [PATCH 03/23] builder: implement pointer atomics (missing in SPIR-V) via integers. --- .../src/builder/builder_methods.rs | 30 +++++++++++++++---- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/crates/rustc_codegen_spirv/src/builder/builder_methods.rs b/crates/rustc_codegen_spirv/src/builder/builder_methods.rs index b746ad091a..4d5b3b9390 100644 --- a/crates/rustc_codegen_spirv/src/builder/builder_methods.rs +++ b/crates/rustc_codegen_spirv/src/builder/builder_methods.rs @@ -1894,7 +1894,12 @@ impl<'a, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'tcx> { order: AtomicOrdering, _size: Size, ) -> Self::Value { - let (ptr, access_ty) = self.adjust_pointer_for_typed_access(ptr, ty); + // HACK(eddyb) SPIR-V lacks pointer atomics, have to use integers instead. + let atomic_ty = match self.lookup_type(ty) { + SpirvType::Pointer { .. } => self.type_usize(), + _ => ty, + }; + let (ptr, access_ty) = self.adjust_pointer_for_typed_access(ptr, atomic_ty); // TODO: Default to device scope let memory = self.constant_u32(self.span(), Scope::Device as u32); @@ -2031,7 +2036,12 @@ impl<'a, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'tcx> { order: AtomicOrdering, _size: Size, ) { - let (ptr, access_ty) = self.adjust_pointer_for_typed_access(ptr, val.ty); + // HACK(eddyb) SPIR-V lacks pointer atomics, have to use integers instead. + let atomic_ty = match self.lookup_type(val.ty) { + SpirvType::Pointer { .. } => self.type_usize(), + _ => val.ty, + }; + let (ptr, access_ty) = self.adjust_pointer_for_typed_access(ptr, atomic_ty); let val = self.bitcast(val, access_ty); // TODO: Default to device scope @@ -3131,7 +3141,12 @@ impl<'a, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'tcx> { assert_ty_eq!(self, cmp.ty, src.ty); let ty = src.ty; - let (dst, access_ty) = self.adjust_pointer_for_typed_access(dst, ty); + // HACK(eddyb) SPIR-V lacks pointer atomics, have to use integers instead. + let atomic_ty = match self.lookup_type(ty) { + SpirvType::Pointer { .. } => self.type_usize(), + _ => ty, + }; + let (dst, access_ty) = self.adjust_pointer_for_typed_access(dst, atomic_ty); let cmp = self.bitcast(cmp, access_ty); let src = self.bitcast(src, access_ty); @@ -3157,7 +3172,7 @@ impl<'a, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'tcx> { .with_type(access_ty); let val = self.bitcast(result, ty); - let success = self.icmp(IntPredicate::IntEQ, val, cmp); + let success = self.icmp(IntPredicate::IntEQ, result, cmp); (val, success) } @@ -3171,7 +3186,12 @@ impl<'a, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'tcx> { ) -> Self::Value { let ty = src.ty; - let (dst, access_ty) = self.adjust_pointer_for_typed_access(dst, ty); + // HACK(eddyb) SPIR-V lacks pointer atomics, have to use integers instead. + let atomic_ty = match self.lookup_type(ty) { + SpirvType::Pointer { .. } => self.type_usize(), + _ => ty, + }; + let (dst, access_ty) = self.adjust_pointer_for_typed_access(dst, atomic_ty); let src = self.bitcast(src, access_ty); self.validate_atomic(access_ty, dst.def(self)); From 3c3a11864dad6cc5f6c9012c7591a415da46e018 Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 9 Sep 2025 11:40:02 +0300 Subject: [PATCH 04/23] Revert "linker/inline: use `OpPhi` instead of `OpVariable` for return values." This reverts commit fcd1b1e750b6fba7684f348bb2f37477ecc1bcdf. --- .../rustc_codegen_spirv/src/linker/inline.rs | 136 +++++++++--------- .../ui/dis/complex_image_sample_inst.stderr | 21 +-- 2 files changed, 80 insertions(+), 77 deletions(-) diff --git a/crates/rustc_codegen_spirv/src/linker/inline.rs b/crates/rustc_codegen_spirv/src/linker/inline.rs index 0ab19bd40f..e2aac8c548 100644 --- a/crates/rustc_codegen_spirv/src/linker/inline.rs +++ b/crates/rustc_codegen_spirv/src/linker/inline.rs @@ -94,6 +94,7 @@ pub fn inline(sess: &Session, module: &mut Module) -> super::Result<()> { header, debug_string_source: &mut module.debug_string_source, annotations: &mut module.annotations, + types_global_values: &mut module.types_global_values, legal_globals, @@ -492,6 +493,7 @@ struct Inliner<'a, 'b> { header: &'b mut ModuleHeader, debug_string_source: &'b mut Vec, annotations: &'b mut Vec, + types_global_values: &'b mut Vec, legal_globals: FxHashMap, functions_that_may_abort: FxHashSet, @@ -521,6 +523,29 @@ impl Inliner<'_, '_> { } } + fn ptr_ty(&mut self, pointee: Word) -> Word { + // TODO: This is horribly slow, fix this + let existing = self.types_global_values.iter().find(|inst| { + inst.class.opcode == Op::TypePointer + && inst.operands[0].unwrap_storage_class() == StorageClass::Function + && inst.operands[1].unwrap_id_ref() == pointee + }); + if let Some(existing) = existing { + return existing.result_id.unwrap(); + } + let inst_id = self.id(); + self.types_global_values.push(Instruction::new( + Op::TypePointer, + None, + Some(inst_id), + vec![ + Operand::StorageClass(StorageClass::Function), + Operand::IdRef(pointee), + ], + )); + inst_id + } + fn inline_fn( &mut self, function: &mut Function, @@ -597,19 +622,15 @@ impl Inliner<'_, '_> { .insert(caller.def_id().unwrap()); } - let mut maybe_call_result_phi = { + let call_result_type = { let ty = call_inst.result_type.unwrap(); if ty == self.op_type_void_id { None } else { - Some(Instruction::new( - Op::Phi, - Some(ty), - Some(call_inst.result_id.unwrap()), - vec![], - )) + Some(ty) } }; + let call_result_id = call_inst.result_id.unwrap(); // Get the debug "source location" instruction that applies to the call. let custom_ext_inst_set_import = self.custom_ext_inst_set_import; @@ -646,12 +667,17 @@ impl Inliner<'_, '_> { }); let mut rewrite_rules = callee_parameters.zip(call_arguments).collect(); + let return_variable = if call_result_type.is_some() { + Some(self.id()) + } else { + None + }; let return_jump = self.id(); // Rewrite OpReturns of the callee. let mut inlined_callee_blocks = self.get_inlined_blocks( callee, call_debug_src_loc_inst, - maybe_call_result_phi.as_mut(), + return_variable, return_jump, ); // Clone the IDs of the callee, because otherwise they'd be defined multiple times if the @@ -660,55 +686,6 @@ impl Inliner<'_, '_> { apply_rewrite_rules(&rewrite_rules, &mut inlined_callee_blocks); self.apply_rewrite_for_decorations(&rewrite_rules); - if let Some(call_result_phi) = &mut maybe_call_result_phi { - // HACK(eddyb) new IDs should be generated earlier, to avoid pushing - // callee IDs to `call_result_phi.operands` only to rewrite them here. - for op in &mut call_result_phi.operands { - if let Some(id) = op.id_ref_any_mut() - && let Some(&rewrite) = rewrite_rules.get(id) - { - *id = rewrite; - } - } - - // HACK(eddyb) this special-casing of the single-return case is - // really necessary for passes like `mem2reg` which are not capable - // of skipping through the extraneous `OpPhi`s on their own. - if let [returned_value, _return_block] = &call_result_phi.operands[..] { - let call_result_id = call_result_phi.result_id.unwrap(); - let returned_value_id = returned_value.unwrap_id_ref(); - - maybe_call_result_phi = None; - - // HACK(eddyb) this is a conservative approximation of all the - // instructions that could potentially reference the call result. - let reaching_insts = { - let (pre_call_blocks, call_and_post_call_blocks) = - caller.blocks.split_at_mut(block_idx); - (pre_call_blocks.iter_mut().flat_map(|block| { - block - .instructions - .iter_mut() - .take_while(|inst| inst.class.opcode == Op::Phi) - })) - .chain( - call_and_post_call_blocks - .iter_mut() - .flat_map(|block| &mut block.instructions), - ) - }; - for reaching_inst in reaching_insts { - for op in &mut reaching_inst.operands { - if let Some(id) = op.id_ref_any_mut() - && *id == call_result_id - { - *id = returned_value_id; - } - } - } - } - } - // Split the block containing the `OpFunctionCall` into pre-call vs post-call. let pre_call_block_idx = block_idx; #[expect(unused)] @@ -724,6 +701,18 @@ impl Inliner<'_, '_> { .unwrap(); assert!(call.class.opcode == Op::FunctionCall); + if let Some(call_result_type) = call_result_type { + // Generate the storage space for the return value: Do this *after* the split above, + // because if block_idx=0, inserting a variable here shifts call_index. + let ret_var_inst = Instruction::new( + Op::Variable, + Some(self.ptr_ty(call_result_type)), + Some(return_variable.unwrap()), + vec![Operand::StorageClass(StorageClass::Function)], + ); + self.insert_opvariables(&mut caller.blocks[0], [ret_var_inst]); + } + // Insert non-entry inlined callee blocks just after the pre-call block. let non_entry_inlined_callee_blocks = inlined_callee_blocks.drain(1..); let num_non_entry_inlined_callee_blocks = non_entry_inlined_callee_blocks.len(); @@ -732,9 +721,18 @@ impl Inliner<'_, '_> { non_entry_inlined_callee_blocks, ); - if let Some(call_result_phi) = maybe_call_result_phi { - // Add the `OpPhi` for the call result value, after the inlined function. - post_call_block_insts.insert(0, call_result_phi); + if let Some(call_result_type) = call_result_type { + // Add the load of the result value after the inlined function. Note there's guaranteed no + // OpPhi instructions since we just split this block. + post_call_block_insts.insert( + 0, + Instruction::new( + Op::Load, + Some(call_result_type), + Some(call_result_id), + vec![Operand::IdRef(return_variable.unwrap())], + ), + ); } // Insert the post-call block, after all the inlined callee blocks. @@ -901,7 +899,7 @@ impl Inliner<'_, '_> { &mut self, callee: &Function, call_debug_src_loc_inst: Option<&Instruction>, - mut maybe_call_result_phi: Option<&mut Instruction>, + return_variable: Option, return_jump: Word, ) -> Vec { let Self { @@ -999,13 +997,17 @@ impl Inliner<'_, '_> { if let Op::Return | Op::ReturnValue = terminator.class.opcode { if Op::ReturnValue == terminator.class.opcode { let return_value = terminator.operands[0].id_ref_any().unwrap(); - let call_result_phi = maybe_call_result_phi.as_deref_mut().unwrap(); - call_result_phi.operands.extend([ - Operand::IdRef(return_value), - Operand::IdRef(block.label_id().unwrap()), - ]); + block.instructions.push(Instruction::new( + Op::Store, + None, + None, + vec![ + Operand::IdRef(return_variable.unwrap()), + Operand::IdRef(return_value), + ], + )); } else { - assert!(maybe_call_result_phi.is_none()); + assert!(return_variable.is_none()); } terminator = Instruction::new(Op::Branch, None, None, vec![Operand::IdRef(return_jump)]); diff --git a/tests/compiletests/ui/dis/complex_image_sample_inst.stderr b/tests/compiletests/ui/dis/complex_image_sample_inst.stderr index b99b63a736..3d62336afd 100644 --- a/tests/compiletests/ui/dis/complex_image_sample_inst.stderr +++ b/tests/compiletests/ui/dis/complex_image_sample_inst.stderr @@ -3,18 +3,19 @@ %5 = OpFunctionParameter %6 %7 = OpFunctionParameter %6 %8 = OpLabel -%9 = OpCompositeExtract %10 %5 0 -%11 = OpCompositeExtract %10 %5 1 -%12 = OpCompositeConstruct %6 %9 %11 -%13 = OpCompositeExtract %10 %7 0 -%14 = OpCompositeExtract %10 %7 1 -%15 = OpCompositeConstruct %6 %13 %14 -OpLine %16 29 13 +OpLine %9 10 25 +%10 = OpCompositeExtract %11 %5 0 +%12 = OpCompositeExtract %11 %5 1 +%13 = OpCompositeConstruct %6 %10 %12 +%14 = OpCompositeExtract %11 %7 0 +%15 = OpCompositeExtract %11 %7 1 +%16 = OpCompositeConstruct %6 %14 %15 +OpLine %9 29 13 %17 = OpAccessChain %18 %19 %20 -OpLine %16 30 13 +OpLine %9 30 13 %21 = OpLoad %22 %17 -OpLine %16 34 13 -%23 = OpImageSampleProjExplicitLod %2 %21 %4 Grad %12 %15 +OpLine %9 34 13 +%23 = OpImageSampleProjExplicitLod %2 %21 %4 Grad %13 %16 OpNoLine OpReturnValue %23 OpFunctionEnd From 8ae4bae2021788103252760a298c3acaffdd1a44 Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 9 Sep 2025 11:40:02 +0300 Subject: [PATCH 05/23] Revert "WIP: mem2reg speedup" This reverts commit efbf694edef096f01c10482d262882fb3d1711ff. --- crates/rustc_codegen_spirv/src/linker/dce.rs | 11 +-- .../rustc_codegen_spirv/src/linker/mem2reg.rs | 91 ++++++++----------- crates/rustc_codegen_spirv/src/linker/mod.rs | 7 +- 3 files changed, 46 insertions(+), 63 deletions(-) diff --git a/crates/rustc_codegen_spirv/src/linker/dce.rs b/crates/rustc_codegen_spirv/src/linker/dce.rs index 20bcbd3310..4be10c0d5b 100644 --- a/crates/rustc_codegen_spirv/src/linker/dce.rs +++ b/crates/rustc_codegen_spirv/src/linker/dce.rs @@ -7,10 +7,9 @@ //! *references* a rooted thing is also rooted, not the other way around - but that's the basic //! concept. -use rspirv::dr::{Block, Function, Instruction, Module, Operand}; +use rspirv::dr::{Function, Instruction, Module, Operand}; use rspirv::spirv::{Decoration, LinkageType, Op, StorageClass, Word}; -use rustc_data_structures::fx::{FxIndexMap, FxIndexSet}; -use std::hash::Hash; +use rustc_data_structures::fx::FxIndexSet; pub fn dce(module: &mut Module) { let mut rooted = collect_roots(module); @@ -138,11 +137,11 @@ fn kill_unrooted(module: &mut Module, rooted: &FxIndexSet) { } } -pub fn dce_phi(blocks: &mut FxIndexMap) { +pub fn dce_phi(func: &mut Function) { let mut used = FxIndexSet::default(); loop { let mut changed = false; - for inst in blocks.values().flat_map(|block| &block.instructions) { + for inst in func.all_inst_iter() { if inst.class.opcode != Op::Phi || used.contains(&inst.result_id.unwrap()) { for op in &inst.operands { if let Some(id) = op.id_ref_any() { @@ -155,7 +154,7 @@ pub fn dce_phi(blocks: &mut FxIndexMap) { break; } } - for block in blocks.values_mut() { + for block in &mut func.blocks { block .instructions .retain(|inst| inst.class.opcode != Op::Phi || used.contains(&inst.result_id.unwrap())); diff --git a/crates/rustc_codegen_spirv/src/linker/mem2reg.rs b/crates/rustc_codegen_spirv/src/linker/mem2reg.rs index df2434d63d..16889cbcc3 100644 --- a/crates/rustc_codegen_spirv/src/linker/mem2reg.rs +++ b/crates/rustc_codegen_spirv/src/linker/mem2reg.rs @@ -13,14 +13,10 @@ use super::simple_passes::outgoing_edges; use super::{apply_rewrite_rules, id}; use rspirv::dr::{Block, Function, Instruction, ModuleHeader, Operand}; use rspirv::spirv::{Op, Word}; -use rustc_data_structures::fx::{FxHashMap, FxHashSet, FxIndexMap}; +use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_middle::bug; use std::collections::hash_map; -// HACK(eddyb) newtype instead of type alias to avoid mistakes. -#[derive(Copy, Clone, PartialEq, Eq, Hash)] -struct LabelId(Word); - pub fn mem2reg( header: &mut ModuleHeader, types_global_values: &mut Vec, @@ -28,16 +24,8 @@ pub fn mem2reg( constants: &FxHashMap, func: &mut Function, ) { - // HACK(eddyb) this ad-hoc indexing might be useful elsewhere as well, but - // it's made completely irrelevant by SPIR-T so only applies to legacy code. - let mut blocks: FxIndexMap<_, _> = func - .blocks - .iter_mut() - .map(|block| (LabelId(block.label_id().unwrap()), block)) - .collect(); - - let reachable = compute_reachable(&blocks); - let preds = compute_preds(&blocks, &reachable); + let reachable = compute_reachable(&func.blocks); + let preds = compute_preds(&func.blocks, &reachable); let idom = compute_idom(&preds, &reachable); let dominance_frontier = compute_dominance_frontier(&preds, &idom); loop { @@ -46,27 +34,31 @@ pub fn mem2reg( types_global_values, pointer_to_pointee, constants, - &mut blocks, + &mut func.blocks, &dominance_frontier, ); if !changed { break; } // mem2reg produces minimal SSA form, not pruned, so DCE the dead ones - super::dce::dce_phi(&mut blocks); + super::dce::dce_phi(func); } } -fn compute_reachable(blocks: &FxIndexMap) -> Vec { - fn recurse(blocks: &FxIndexMap, reachable: &mut [bool], block: usize) { +fn label_to_index(blocks: &[Block], id: Word) -> usize { + blocks + .iter() + .position(|b| b.label_id().unwrap() == id) + .unwrap() +} + +fn compute_reachable(blocks: &[Block]) -> Vec { + fn recurse(blocks: &[Block], reachable: &mut [bool], block: usize) { if !reachable[block] { reachable[block] = true; - for dest_id in outgoing_edges(blocks[block]) { - recurse( - blocks, - reachable, - blocks.get_index_of(&LabelId(dest_id)).unwrap(), - ); + for dest_id in outgoing_edges(&blocks[block]) { + let dest_idx = label_to_index(blocks, dest_id); + recurse(blocks, reachable, dest_idx); } } } @@ -75,19 +67,17 @@ fn compute_reachable(blocks: &FxIndexMap) -> Vec { reachable } -fn compute_preds( - blocks: &FxIndexMap, - reachable_blocks: &[bool], -) -> Vec> { +fn compute_preds(blocks: &[Block], reachable_blocks: &[bool]) -> Vec> { let mut result = vec![vec![]; blocks.len()]; // Do not count unreachable blocks as valid preds of blocks for (source_idx, source) in blocks - .values() + .iter() .enumerate() .filter(|&(b, _)| reachable_blocks[b]) { for dest_id in outgoing_edges(source) { - result[blocks.get_index_of(&LabelId(dest_id)).unwrap()].push(source_idx); + let dest_idx = label_to_index(blocks, dest_id); + result[dest_idx].push(source_idx); } } result @@ -171,7 +161,7 @@ fn insert_phis_all( types_global_values: &mut Vec, pointer_to_pointee: &FxHashMap, constants: &FxHashMap, - blocks: &mut FxIndexMap, + blocks: &mut [Block], dominance_frontier: &[FxHashSet], ) -> bool { let var_maps_and_types = blocks[0] @@ -208,11 +198,7 @@ fn insert_phis_all( rewrite_rules: FxHashMap::default(), }; renamer.rename(0, None); - // FIXME(eddyb) shouldn't this full rescan of the function be done once? - apply_rewrite_rules( - &renamer.rewrite_rules, - blocks.values_mut().map(|block| &mut **block), - ); + apply_rewrite_rules(&renamer.rewrite_rules, blocks); remove_nops(blocks); } remove_old_variables(blocks, &var_maps_and_types); @@ -230,7 +216,7 @@ struct VarInfo { fn collect_access_chains( pointer_to_pointee: &FxHashMap, constants: &FxHashMap, - blocks: &FxIndexMap, + blocks: &[Block], base_var: Word, base_var_ty: Word, ) -> Option> { @@ -263,7 +249,7 @@ fn collect_access_chains( // Loop in case a previous block references a later AccessChain loop { let mut changed = false; - for inst in blocks.values().flat_map(|b| &b.instructions) { + for inst in blocks.iter().flat_map(|b| &b.instructions) { for (index, op) in inst.operands.iter().enumerate() { if let Operand::IdRef(id) = op && variables.contains_key(id) @@ -317,10 +303,10 @@ fn collect_access_chains( // same var map (e.g. `s.x = s.y;`). fn split_copy_memory( header: &mut ModuleHeader, - blocks: &mut FxIndexMap, + blocks: &mut [Block], var_map: &FxHashMap, ) { - for block in blocks.values_mut() { + for block in blocks { let mut inst_index = 0; while inst_index < block.instructions.len() { let inst = &block.instructions[inst_index]; @@ -379,7 +365,7 @@ fn has_store(block: &Block, var_map: &FxHashMap) -> bool { } fn insert_phis( - blocks: &FxIndexMap, + blocks: &[Block], dominance_frontier: &[FxHashSet], var_map: &FxHashMap, ) -> FxHashSet { @@ -388,7 +374,7 @@ fn insert_phis( let mut ever_on_work_list = FxHashSet::default(); let mut work_list = Vec::new(); let mut blocks_with_phi = FxHashSet::default(); - for (block_idx, block) in blocks.values().enumerate() { + for (block_idx, block) in blocks.iter().enumerate() { if has_store(block, var_map) { ever_on_work_list.insert(block_idx); work_list.push(block_idx); @@ -433,10 +419,10 @@ fn top_stack_or_undef( } } -struct Renamer<'a, 'b> { +struct Renamer<'a> { header: &'a mut ModuleHeader, types_global_values: &'a mut Vec, - blocks: &'a mut FxIndexMap, + blocks: &'a mut [Block], blocks_with_phi: FxHashSet, base_var_type: Word, var_map: &'a FxHashMap, @@ -446,7 +432,7 @@ struct Renamer<'a, 'b> { rewrite_rules: FxHashMap, } -impl Renamer<'_, '_> { +impl Renamer<'_> { // Returns the phi definition. fn insert_phi_value(&mut self, block: usize, from_block: usize) -> Word { let from_block_label = self.blocks[from_block].label_id().unwrap(); @@ -568,8 +554,9 @@ impl Renamer<'_, '_> { } } - for dest_id in outgoing_edges(self.blocks[block]).collect::>() { - let dest_idx = self.blocks.get_index_of(&LabelId(dest_id)).unwrap(); + for dest_id in outgoing_edges(&self.blocks[block]).collect::>() { + // TODO: Don't do this find + let dest_idx = label_to_index(self.blocks, dest_id); self.rename(dest_idx, Some(block)); } @@ -579,8 +566,8 @@ impl Renamer<'_, '_> { } } -fn remove_nops(blocks: &mut FxIndexMap) { - for block in blocks.values_mut() { +fn remove_nops(blocks: &mut [Block]) { + for block in blocks { block .instructions .retain(|inst| inst.class.opcode != Op::Nop); @@ -588,7 +575,7 @@ fn remove_nops(blocks: &mut FxIndexMap) { } fn remove_old_variables( - blocks: &mut FxIndexMap, + blocks: &mut [Block], var_maps_and_types: &[(FxHashMap, u32)], ) { blocks[0].instructions.retain(|inst| { @@ -599,7 +586,7 @@ fn remove_old_variables( .all(|(var_map, _)| !var_map.contains_key(&result_id)) } }); - for block in blocks.values_mut() { + for block in blocks { block.instructions.retain(|inst| { !matches!(inst.class.opcode, Op::AccessChain | Op::InBoundsAccessChain) || inst.operands.iter().all(|op| { diff --git a/crates/rustc_codegen_spirv/src/linker/mod.rs b/crates/rustc_codegen_spirv/src/linker/mod.rs index e0d612cab2..e6c5bf3552 100644 --- a/crates/rustc_codegen_spirv/src/linker/mod.rs +++ b/crates/rustc_codegen_spirv/src/linker/mod.rs @@ -91,12 +91,9 @@ fn id(header: &mut ModuleHeader) -> Word { result } -fn apply_rewrite_rules<'a>( - rewrite_rules: &FxHashMap, - blocks: impl IntoIterator, -) { +fn apply_rewrite_rules(rewrite_rules: &FxHashMap, blocks: &mut [Block]) { let all_ids_mut = blocks - .into_iter() + .iter_mut() .flat_map(|b| b.label.iter_mut().chain(b.instructions.iter_mut())) .flat_map(|inst| { inst.result_id From 2ce3487a03b4f5b2df5987c87a62040970e95fb9 Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 9 Sep 2025 11:40:02 +0300 Subject: [PATCH 06/23] Revert "WIP: couple of inliner things that need to be disentangled" This reverts commit ea20ef36a7348084a72e8553150f26df16af4361. --- .../rustc_codegen_spirv/src/linker/inline.rs | 297 ++++++++++-------- .../ui/lang/consts/nested-ref.stderr | 20 -- .../core/ref/member_ref_arg-broken.stderr | 38 +-- .../ui/lang/core/ref/member_ref_arg.stderr | 20 -- .../ui/lang/panic/track_caller.stderr | 11 - 5 files changed, 166 insertions(+), 220 deletions(-) delete mode 100644 tests/compiletests/ui/lang/consts/nested-ref.stderr delete mode 100644 tests/compiletests/ui/lang/core/ref/member_ref_arg.stderr delete mode 100644 tests/compiletests/ui/lang/panic/track_caller.stderr diff --git a/crates/rustc_codegen_spirv/src/linker/inline.rs b/crates/rustc_codegen_spirv/src/linker/inline.rs index e2aac8c548..432056c935 100644 --- a/crates/rustc_codegen_spirv/src/linker/inline.rs +++ b/crates/rustc_codegen_spirv/src/linker/inline.rs @@ -8,15 +8,13 @@ use super::apply_rewrite_rules; use super::ipo::CallGraph; use super::simple_passes::outgoing_edges; use super::{get_name, get_names}; -use crate::custom_decorations::SpanRegenerator; use crate::custom_insts::{self, CustomInst, CustomOp}; use rspirv::dr::{Block, Function, Instruction, Module, ModuleHeader, Operand}; use rspirv::spirv::{FunctionControl, Op, StorageClass, Word}; -use rustc_data_structures::fx::{FxHashMap, FxHashSet, FxIndexMap, FxIndexSet}; +use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_errors::ErrorGuaranteed; use rustc_session::Session; use smallvec::SmallVec; -use std::cmp::Ordering; use std::mem; // FIXME(eddyb) this is a bit silly, but this keeps being repeated everywhere. @@ -42,10 +40,40 @@ pub fn inline(sess: &Session, module: &mut Module) -> super::Result<()> { }) .map(|inst| inst.result_id.unwrap()); + /* + // Drop all the functions we'll be inlining. (This also means we won't waste time processing + // inlines in functions that will get inlined) + let mut dropped_ids = FxHashSet::default(); + let mut inlined_to_legalize_dont_inlines = Vec::new(); + module.functions.retain(|f| { + let should_inline_f = should_inline(&legal_globals, &functions_that_may_abort, f, None); + if should_inline_f != Ok(false) { + if should_inline_f == Err(MustInlineToLegalize) && has_dont_inline(f) { + inlined_to_legalize_dont_inlines.push(f.def_id().unwrap()); + } + // TODO: We should insert all defined IDs in this function. + dropped_ids.insert(f.def_id().unwrap()); + false + } else { + true + } + }); + + if !inlined_to_legalize_dont_inlines.is_empty() { + let names = get_names(module); + for f in inlined_to_legalize_dont_inlines { + sess.dcx().warn(format!( + "`#[inline(never)]` function `{}` needs to be inlined \ + because it has illegal argument or return types", + get_name(&names, f) + )); + } + } + */ + let legal_globals = LegalGlobal::gather_from_module(module); let header = module.header.as_mut().unwrap(); - // FIXME(eddyb) clippy false positive (separate `map` required for borrowck). #[allow(clippy::map_unwrap_or)] let mut inliner = Inliner { @@ -125,8 +153,6 @@ pub fn inline(sess: &Session, module: &mut Module) -> super::Result<()> { .then_some(func.def_id().unwrap()) }) .collect(), - - inlined_dont_inlines_to_cause_and_callers: FxIndexMap::default(), }; let mut functions: Vec<_> = mem::take(&mut module.functions) @@ -145,52 +171,14 @@ pub fn inline(sess: &Session, module: &mut Module) -> super::Result<()> { module.functions = functions.into_iter().map(|func| func.unwrap()).collect(); - let Inliner { - id_to_name, - inlined_dont_inlines_to_cause_and_callers, - .. - } = inliner; - - let mut span_regen = SpanRegenerator::new(sess.source_map(), module); - for (callee_id, (cause, callers)) in inlined_dont_inlines_to_cause_and_callers { - let callee_name = get_name(&id_to_name, callee_id); - - // HACK(eddyb) `libcore` hides panics behind `#[inline(never)]` `fn`s, - // making this too noisy and useless (since it's an impl detail). - if cause == "panicking" && callee_name.starts_with("core::") { - continue; - } - - let callee_span = span_regen - .src_loc_for_id(callee_id) - .and_then(|src_loc| span_regen.src_loc_to_rustc(src_loc)) - .unwrap_or_default(); - sess.dcx() - .struct_span_warn( - callee_span, - format!("`#[inline(never)]` function `{callee_name}` has been inlined"), - ) - .with_note(format!("inlining was required due to {cause}")) - .with_note(format!( - "called from {}", - callers - .iter() - .enumerate() - .filter_map(|(i, &caller_id)| { - // HACK(eddyb) avoid showing too many names. - match i.cmp(&4) { - Ordering::Less => { - Some(format!("`{}`", get_name(&id_to_name, caller_id))) - } - Ordering::Equal => Some(format!("and {} more", callers.len() - i)), - Ordering::Greater => None, - } - }) - .collect::>() - .join(", ") - )) - .emit(); - } + /* + // Drop OpName etc. for inlined functions + module.debug_names.retain(|inst| { + !inst + .operands + .iter() + .any(|op| op.id_ref_any().is_some_and(|id| dropped_ids.contains(&id))) + });*/ Ok(()) } @@ -373,42 +361,42 @@ fn has_dont_inline(function: &Function) -> bool { /// Helper error type for `should_inline` (see its doc comment). #[derive(Copy, Clone, PartialEq, Eq)] -struct MustInlineToLegalize(&'static str); +struct MustInlineToLegalize; -/// Returns `Ok(true)`/`Err(MustInlineToLegalize(_))` if `callee` should/must be +/// Returns `Ok(true)`/`Err(MustInlineToLegalize)` if `callee` should/must be /// inlined (either in general, or specifically from `call_site`, if provided). /// -/// The distinction made here is that `Err(MustInlineToLegalize(cause))` is -/// very much *not* a heuristic, and inlining is *mandatory* due to `cause` -/// (usually illegal signature/arguments, but also the panicking mechanism). -// -// FIXME(eddyb) the causes here are not fine-grained enough. +/// The distinction made is that `Err(MustInlineToLegalize)` is not a heuristic, +/// and inlining is *mandatory* due to an illegal signature/arguments. fn should_inline( legal_globals: &FxHashMap, functions_that_may_abort: &FxHashSet, callee: &Function, - call_site: CallSite<'_>, + call_site: Option>, ) -> Result { let callee_def = callee.def.as_ref().unwrap(); let callee_control = callee_def.operands[0].unwrap_function_control(); - if functions_that_may_abort.contains(&callee.def_id().unwrap()) { - return Err(MustInlineToLegalize("panicking")); + // HACK(eddyb) this "has a call-site" check ensures entry-points don't get + // accidentally removed as "must inline to legalize" function, but can still + // be inlined into other entry-points (if such an unusual situation arises). + if call_site.is_some() && functions_that_may_abort.contains(&callee.def_id().unwrap()) { + return Err(MustInlineToLegalize); } let ret_ty = legal_globals .get(&callee_def.result_type.unwrap()) - .ok_or(MustInlineToLegalize("illegal return type"))?; + .ok_or(MustInlineToLegalize)?; if !ret_ty.legal_as_fn_ret_ty() { - return Err(MustInlineToLegalize("illegal (pointer) return type")); + return Err(MustInlineToLegalize); } for (i, param) in callee.parameters.iter().enumerate() { let param_ty = legal_globals .get(param.result_type.as_ref().unwrap()) - .ok_or(MustInlineToLegalize("illegal parameter type"))?; + .ok_or(MustInlineToLegalize)?; if !param_ty.legal_as_fn_param_ty() { - return Err(MustInlineToLegalize("illegal (pointer) parameter type")); + return Err(MustInlineToLegalize); } // If the call isn't passing a legal pointer argument (a "memory object", @@ -416,13 +404,13 @@ fn should_inline( // then inlining is required to have a chance at producing legal SPIR-V. // // FIXME(eddyb) rewriting away the pointer could be another alternative. - if let LegalGlobal::TypePointer(_) = param_ty { + if let (LegalGlobal::TypePointer(_), Some(call_site)) = (param_ty, call_site) { let ptr_arg = call_site.call_inst.operands[i + 1].unwrap_id_ref(); match legal_globals.get(&ptr_arg) { Some(LegalGlobal::Variable) => {} // FIXME(eddyb) should some constants (undef/null) be allowed? - Some(_) => return Err(MustInlineToLegalize("illegal (pointer) argument")), + Some(_) => return Err(MustInlineToLegalize), None => { let mut caller_param_and_var_ids = call_site @@ -448,7 +436,7 @@ fn should_inline( .map(|caller_inst| caller_inst.result_id.unwrap()); if !caller_param_and_var_ids.any(|id| ptr_arg == id) { - return Err(MustInlineToLegalize("illegal (pointer) argument")); + return Err(MustInlineToLegalize); } } } @@ -469,7 +457,7 @@ struct FuncIsBeingInlined; // Renumber IDs // Insert blocks -struct Inliner<'a, 'b> { +struct Inliner<'m> { /// ID of `OpExtInstImport` for our custom "extended instruction set" /// (see `crate::custom_insts` for more details). custom_ext_inst_set_import: Word, @@ -481,27 +469,26 @@ struct Inliner<'a, 'b> { /// Pre-collected `OpName`s, that can be used to find any function's name /// during inlining (to be able to generate debuginfo that uses names). - id_to_name: FxHashMap, + id_to_name: FxHashMap, /// `OpString` cache (for deduplicating `OpString`s for the same string). // // FIXME(eddyb) currently this doesn't reuse existing `OpString`s, but since // this is mostly for inlined callee names, it's expected almost no overlap // exists between existing `OpString`s and new ones, anyway. - cached_op_strings: FxHashMap<&'a str, Word>, + cached_op_strings: FxHashMap<&'m str, Word>, - header: &'b mut ModuleHeader, - debug_string_source: &'b mut Vec, - annotations: &'b mut Vec, - types_global_values: &'b mut Vec, + header: &'m mut ModuleHeader, + debug_string_source: &'m mut Vec, + annotations: &'m mut Vec, + types_global_values: &'m mut Vec, legal_globals: FxHashMap, functions_that_may_abort: FxHashSet, - inlined_dont_inlines_to_cause_and_callers: FxIndexMap)>, // rewrite_rules: FxHashMap, } -impl Inliner<'_, '_> { +impl Inliner<'_> { fn id(&mut self) -> Word { next_id(self.header) } @@ -593,19 +580,10 @@ impl Inliner<'_, '_> { &self.legal_globals, &self.functions_that_may_abort, f, - call_site, + Some(call_site), ) { Ok(inline) => inline, - Err(MustInlineToLegalize(cause)) => { - if has_dont_inline(f) { - self.inlined_dont_inlines_to_cause_and_callers - .entry(f.def_id().unwrap()) - .or_insert_with(|| (cause, Default::default())) - .1 - .insert(caller.def_id().unwrap()); - } - true - } + Err(MustInlineToLegalize) => true, } }); let (call_index, call_inst, callee) = match call { @@ -632,28 +610,18 @@ impl Inliner<'_, '_> { }; let call_result_id = call_inst.result_id.unwrap(); - // Get the debug "source location" instruction that applies to the call. + // Get the debuginfo instructions that apply to the call. + // TODO(eddyb) only one instruction should be necessary here w/ bottom-up. let custom_ext_inst_set_import = self.custom_ext_inst_set_import; - let call_debug_src_loc_inst = caller.blocks[block_idx].instructions[..call_index] + let call_debug_insts = caller.blocks[block_idx].instructions[..call_index] .iter() - .rev() - .find_map(|inst| { - Some(match inst.class.opcode { - Op::Line => Some(inst), - Op::NoLine => None, - Op::ExtInst - if inst.operands[0].unwrap_id_ref() == custom_ext_inst_set_import => - { - match CustomOp::decode_from_ext_inst(inst) { - CustomOp::SetDebugSrcLoc => Some(inst), - CustomOp::ClearDebugSrcLoc => None, - _ => return None, - } - } - _ => return None, - }) - }) - .flatten(); + .filter(|inst| match inst.class.opcode { + Op::Line | Op::NoLine => true, + Op::ExtInst if inst.operands[0].unwrap_id_ref() == custom_ext_inst_set_import => { + CustomOp::decode_from_ext_inst(inst).is_debuginfo() + } + _ => false, + }); // Rewrite parameters to arguments let call_arguments = call_inst @@ -674,12 +642,9 @@ impl Inliner<'_, '_> { }; let return_jump = self.id(); // Rewrite OpReturns of the callee. - let mut inlined_callee_blocks = self.get_inlined_blocks( - callee, - call_debug_src_loc_inst, - return_variable, - return_jump, - ); + #[allow(clippy::needless_borrow)] + let (mut inlined_callee_blocks, extra_debug_insts_pre_call, extra_debug_insts_post_call) = + self.get_inlined_blocks(&callee, call_debug_insts, return_variable, return_jump); // Clone the IDs of the callee, because otherwise they'd be defined multiple times if the // fn is inlined multiple times. self.add_clone_id_rules(&mut rewrite_rules, &inlined_callee_blocks); @@ -701,6 +666,13 @@ impl Inliner<'_, '_> { .unwrap(); assert!(call.class.opcode == Op::FunctionCall); + // HACK(eddyb) inject the additional debuginfo instructions generated by + // `get_inlined_blocks`, so the inlined call frame "stack" isn't corrupted. + caller.blocks[pre_call_block_idx] + .instructions + .extend(extra_debug_insts_pre_call); + post_call_block_insts.splice(0..0, extra_debug_insts_post_call); + if let Some(call_result_type) = call_result_type { // Generate the storage space for the return value: Do this *after* the split above, // because if block_idx=0, inserting a variable here shifts call_index. @@ -895,19 +867,59 @@ impl Inliner<'_, '_> { } } - fn get_inlined_blocks( + // HACK(eddyb) the second and third return values are additional debuginfo + // instructions that need to be inserted just before/after the callsite. + fn get_inlined_blocks<'a>( &mut self, callee: &Function, - call_debug_src_loc_inst: Option<&Instruction>, + call_debug_insts: impl Iterator, return_variable: Option, return_jump: Word, - ) -> Vec { + ) -> ( + Vec, + SmallVec<[Instruction; 8]>, + SmallVec<[Instruction; 8]>, + ) { let Self { custom_ext_inst_set_import, op_type_void_id, .. } = *self; + // TODO(eddyb) kill this as it shouldn't be needed for bottom-up inline. + // HACK(eddyb) this is terrible, but we have to deal with it because of + // how this inliner is outside-in, instead of inside-out, meaning that + // context builds up "outside" of the callee blocks, inside the caller. + let mut enclosing_inlined_frames = SmallVec::<[_; 8]>::new(); + let mut current_debug_src_loc_inst = None; + for inst in call_debug_insts { + match inst.class.opcode { + Op::Line => current_debug_src_loc_inst = Some(inst), + Op::NoLine => current_debug_src_loc_inst = None, + Op::ExtInst + if inst.operands[0].unwrap_id_ref() == self.custom_ext_inst_set_import => + { + match CustomOp::decode_from_ext_inst(inst) { + CustomOp::SetDebugSrcLoc => current_debug_src_loc_inst = Some(inst), + CustomOp::ClearDebugSrcLoc => current_debug_src_loc_inst = None, + CustomOp::PushInlinedCallFrame => { + enclosing_inlined_frames + .push((current_debug_src_loc_inst.take(), inst)); + } + CustomOp::PopInlinedCallFrame => { + if let Some((callsite_debug_src_loc_inst, _)) = + enclosing_inlined_frames.pop() + { + current_debug_src_loc_inst = callsite_debug_src_loc_inst; + } + } + CustomOp::Abort => {} + } + } + _ => {} + } + } + // Prepare the debuginfo insts to prepend/append to every block. // FIXME(eddyb) this could be more efficient if we only used one pair of // `{Push,Pop}InlinedCallFrame` for the whole inlined callee, but there @@ -932,7 +944,7 @@ impl Inliner<'_, '_> { )); id }); - let mut mk_debuginfo_prefix_and_suffix = || { + let mut mk_debuginfo_prefix_and_suffix = |include_callee_frame| { // NOTE(eddyb) `OpExtInst`s have a result ID, even if unused, and // it has to be unique (same goes for the other instructions below). let instantiate_debuginfo = |this: &mut Self, inst: &Instruction| { @@ -956,18 +968,33 @@ impl Inliner<'_, '_> { .collect(), ) }; + // FIXME(eddyb) this only allocates to avoid borrow conflicts. + let mut prefix = SmallVec::<[_; 8]>::new(); + let mut suffix = SmallVec::<[_; 8]>::new(); + for &(callsite_debug_src_loc_inst, push_inlined_call_frame_inst) in + &enclosing_inlined_frames + { + prefix.extend( + callsite_debug_src_loc_inst + .into_iter() + .chain([push_inlined_call_frame_inst]) + .map(|inst| instantiate_debuginfo(self, inst)), + ); + suffix.push(custom_inst_to_inst(self, CustomInst::PopInlinedCallFrame)); + } + prefix.extend(current_debug_src_loc_inst.map(|inst| instantiate_debuginfo(self, inst))); + + if include_callee_frame { + prefix.push(custom_inst_to_inst( + self, + CustomInst::PushInlinedCallFrame { + callee_name: Operand::IdRef(callee_name_id), + }, + )); + suffix.push(custom_inst_to_inst(self, CustomInst::PopInlinedCallFrame)); + } - ( - (call_debug_src_loc_inst.map(|inst| instantiate_debuginfo(self, inst))) - .into_iter() - .chain([custom_inst_to_inst( - self, - CustomInst::PushInlinedCallFrame { - callee_name: Operand::IdRef(callee_name_id), - }, - )]), - [custom_inst_to_inst(self, CustomInst::PopInlinedCallFrame)], - ) + (prefix, suffix) }; let mut blocks = callee.blocks.clone(); @@ -1021,7 +1048,7 @@ impl Inliner<'_, '_> { // HACK(eddyb) avoid adding debuginfo to otherwise-empty blocks. if block.instructions.len() > num_phis { - let (debuginfo_prefix, debuginfo_suffix) = mk_debuginfo_prefix_and_suffix(); + let (debuginfo_prefix, debuginfo_suffix) = mk_debuginfo_prefix_and_suffix(true); // Insert the prefix debuginfo instructions after `OpPhi`s, // which sadly can't be covered by them. block @@ -1035,7 +1062,13 @@ impl Inliner<'_, '_> { block.instructions.push(terminator); } - blocks + let (caller_restore_debuginfo_after_call, calleer_reset_debuginfo_before_call) = + mk_debuginfo_prefix_and_suffix(false); + ( + blocks, + calleer_reset_debuginfo_before_call, + caller_restore_debuginfo_after_call, + ) } fn insert_opvariables(&self, block: &mut Block, insts: impl IntoIterator) { diff --git a/tests/compiletests/ui/lang/consts/nested-ref.stderr b/tests/compiletests/ui/lang/consts/nested-ref.stderr deleted file mode 100644 index 805ec9b607..0000000000 --- a/tests/compiletests/ui/lang/consts/nested-ref.stderr +++ /dev/null @@ -1,20 +0,0 @@ -warning: `#[inline(never)]` function `nested_ref::deep_load` has been inlined - --> $DIR/nested-ref.rs:12:4 - | -LL | fn deep_load(r: &'static &'static u32) -> u32 { - | ^^^^^^^^^ - | - = note: inlining was required due to illegal parameter type - = note: called from `nested_ref::main` - -warning: `#[inline(never)]` function `nested_ref::deep_transpose` has been inlined - --> $DIR/nested-ref.rs:19:4 - | -LL | fn deep_transpose(r: &'static &'static Mat2) -> Mat2 { - | ^^^^^^^^^^^^^^ - | - = note: inlining was required due to illegal parameter type - = note: called from `nested_ref::main` - -warning: 2 warnings emitted - diff --git a/tests/compiletests/ui/lang/core/ref/member_ref_arg-broken.stderr b/tests/compiletests/ui/lang/core/ref/member_ref_arg-broken.stderr index 4a41bfb462..f86e4101d8 100644 --- a/tests/compiletests/ui/lang/core/ref/member_ref_arg-broken.stderr +++ b/tests/compiletests/ui/lang/core/ref/member_ref_arg-broken.stderr @@ -1,44 +1,8 @@ -warning: `#[inline(never)]` function `member_ref_arg_broken::f` has been inlined - --> $DIR/member_ref_arg-broken.rs:20:4 - | -LL | fn f(x: &u32) -> u32 { - | ^ - | - = note: inlining was required due to illegal (pointer) argument - = note: called from `member_ref_arg_broken::main` - -warning: `#[inline(never)]` function `member_ref_arg_broken::g` has been inlined - --> $DIR/member_ref_arg-broken.rs:25:4 - | -LL | fn g(xy: (&u32, &u32)) -> (u32, u32) { - | ^ - | - = note: inlining was required due to illegal (pointer) argument - = note: called from `member_ref_arg_broken::main` - -warning: `#[inline(never)]` function `member_ref_arg_broken::h` has been inlined - --> $DIR/member_ref_arg-broken.rs:30:4 - | -LL | fn h(xyz: (&u32, &u32, &u32)) -> (u32, u32, u32) { - | ^ - | - = note: inlining was required due to illegal parameter type - = note: called from `member_ref_arg_broken::main` - -warning: `#[inline(never)]` function `member_ref_arg_broken::h_newtyped` has been inlined - --> $DIR/member_ref_arg-broken.rs:41:4 - | -LL | fn h_newtyped(xyz: ((&u32, &u32, &u32),)) -> (u32, u32, u32) { - | ^^^^^^^^^^ - | - = note: inlining was required due to illegal parameter type - = note: called from `member_ref_arg_broken::main` - error: error:0:0 - OpLoad Pointer '$ID[%$ID]' is not a logical pointer. %39 = OpLoad %uint %38 | = note: spirv-val failed = note: module `$TEST_BUILD_DIR/lang/core/ref/member_ref_arg-broken` -error: aborting due to 1 previous error; 4 warnings emitted +error: aborting due to 1 previous error diff --git a/tests/compiletests/ui/lang/core/ref/member_ref_arg.stderr b/tests/compiletests/ui/lang/core/ref/member_ref_arg.stderr deleted file mode 100644 index 5ec9e260b4..0000000000 --- a/tests/compiletests/ui/lang/core/ref/member_ref_arg.stderr +++ /dev/null @@ -1,20 +0,0 @@ -warning: `#[inline(never)]` function `member_ref_arg::f` has been inlined - --> $DIR/member_ref_arg.rs:14:4 - | -LL | fn f(x: &u32) {} - | ^ - | - = note: inlining was required due to illegal (pointer) argument - = note: called from `member_ref_arg::main` - -warning: `#[inline(never)]` function `member_ref_arg::g` has been inlined - --> $DIR/member_ref_arg.rs:17:4 - | -LL | fn g(xy: (&u32, &u32)) {} - | ^ - | - = note: inlining was required due to illegal (pointer) argument - = note: called from `member_ref_arg::main` - -warning: 2 warnings emitted - diff --git a/tests/compiletests/ui/lang/panic/track_caller.stderr b/tests/compiletests/ui/lang/panic/track_caller.stderr deleted file mode 100644 index b18b2fed0d..0000000000 --- a/tests/compiletests/ui/lang/panic/track_caller.stderr +++ /dev/null @@ -1,11 +0,0 @@ -warning: `#[inline(never)]` function `track_caller::track_caller_maybe_panic::panic_cold_explicit` has been inlined - --> $DIR/track_caller.rs:10:9 - | -LL | panic!(); - | ^^^^^^^^ - | - = note: inlining was required due to panicking - = note: called from `track_caller::track_caller_maybe_panic` - -warning: 1 warning emitted - From f4d690b554e5797f0e6f4d1ac139956aaf6b0c45 Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 9 Sep 2025 11:40:02 +0300 Subject: [PATCH 07/23] Revert "WIP: (TODO: finish bottom-up cleanups) bottom-up inlining" This reverts commit 41ec7ea82b5bdae99879a2aa9715dc73e302c741. --- .../rustc_codegen_spirv/src/linker/inline.rs | 173 ++++++++---------- crates/rustc_codegen_spirv/src/linker/ipo.rs | 16 +- .../ui/dis/panic_builtin_bounds_check.stderr | 3 +- .../ui/lang/consts/nested-ref.stderr | 6 + .../core/ref/member_ref_arg-broken.stderr | 6 +- 5 files changed, 95 insertions(+), 109 deletions(-) create mode 100644 tests/compiletests/ui/lang/consts/nested-ref.stderr diff --git a/crates/rustc_codegen_spirv/src/linker/inline.rs b/crates/rustc_codegen_spirv/src/linker/inline.rs index 432056c935..1e8ddbc5b5 100644 --- a/crates/rustc_codegen_spirv/src/linker/inline.rs +++ b/crates/rustc_codegen_spirv/src/linker/inline.rs @@ -17,6 +17,8 @@ use rustc_session::Session; use smallvec::SmallVec; use std::mem; +type FunctionMap = FxHashMap; + // FIXME(eddyb) this is a bit silly, but this keeps being repeated everywhere. fn next_id(header: &mut ModuleHeader) -> Word { let result = header.bound; @@ -28,9 +30,6 @@ pub fn inline(sess: &Session, module: &mut Module) -> super::Result<()> { // This algorithm gets real sad if there's recursion - but, good news, SPIR-V bans recursion deny_recursion_in_module(sess, module)?; - // Compute the call-graph that will drive (inside-out, aka bottom-up) inlining. - let (call_graph, func_id_to_idx) = CallGraph::collect_with_func_id_to_idx(module); - let custom_ext_inst_set_import = module .ext_inst_imports .iter() @@ -40,7 +39,62 @@ pub fn inline(sess: &Session, module: &mut Module) -> super::Result<()> { }) .map(|inst| inst.result_id.unwrap()); - /* + // HACK(eddyb) compute the set of functions that may `Abort` *transitively*, + // which is only needed because of how we inline (sometimes it's outside-in, + // aka top-down, instead of always being inside-out, aka bottom-up). + // + // (inlining is needed in the first place because our custom `Abort` + // instructions get lowered to a simple `OpReturn` in entry-points, but + // that requires that they get inlined all the way up to the entry-points) + let functions_that_may_abort = custom_ext_inst_set_import + .map(|custom_ext_inst_set_import| { + let mut may_abort_by_id = FxHashSet::default(); + + // FIXME(eddyb) use this `CallGraph` abstraction more during inlining. + let call_graph = CallGraph::collect(module); + for func_idx in call_graph.post_order() { + let func_id = module.functions[func_idx].def_id().unwrap(); + + let any_callee_may_abort = call_graph.callees[func_idx].iter().any(|&callee_idx| { + may_abort_by_id.contains(&module.functions[callee_idx].def_id().unwrap()) + }); + if any_callee_may_abort { + may_abort_by_id.insert(func_id); + continue; + } + + let may_abort_directly = module.functions[func_idx].blocks.iter().any(|block| { + match &block.instructions[..] { + [.., last_normal_inst, terminator_inst] + if last_normal_inst.class.opcode == Op::ExtInst + && last_normal_inst.operands[0].unwrap_id_ref() + == custom_ext_inst_set_import + && CustomOp::decode_from_ext_inst(last_normal_inst) + == CustomOp::Abort => + { + assert_eq!(terminator_inst.class.opcode, Op::Unreachable); + true + } + + _ => false, + } + }); + if may_abort_directly { + may_abort_by_id.insert(func_id); + } + } + + may_abort_by_id + }) + .unwrap_or_default(); + + let functions = module + .functions + .iter() + .map(|f| (f.def_id().unwrap(), f.clone())) + .collect(); + let legal_globals = LegalGlobal::gather_from_module(module); + // Drop all the functions we'll be inlining. (This also means we won't waste time processing // inlines in functions that will get inlined) let mut dropped_ids = FxHashSet::default(); @@ -69,9 +123,6 @@ pub fn inline(sess: &Session, module: &mut Module) -> super::Result<()> { )); } } - */ - - let legal_globals = LegalGlobal::gather_from_module(module); let header = module.header.as_mut().unwrap(); // FIXME(eddyb) clippy false positive (separate `map` required for borrowck). @@ -103,8 +154,6 @@ pub fn inline(sess: &Session, module: &mut Module) -> super::Result<()> { id }), - func_id_to_idx, - id_to_name: module .debug_names .iter() @@ -124,61 +173,22 @@ pub fn inline(sess: &Session, module: &mut Module) -> super::Result<()> { annotations: &mut module.annotations, types_global_values: &mut module.types_global_values, - legal_globals, - - // NOTE(eddyb) this is needed because our custom `Abort` instructions get - // lowered to a simple `OpReturn` in entry-points, but that requires that - // they get inlined all the way up to the entry-points in the first place. - functions_that_may_abort: module - .functions - .iter() - .filter_map(|func| { - let custom_ext_inst_set_import = custom_ext_inst_set_import?; - func.blocks - .iter() - .any(|block| match &block.instructions[..] { - [.., last_normal_inst, terminator_inst] - if last_normal_inst.class.opcode == Op::ExtInst - && last_normal_inst.operands[0].unwrap_id_ref() - == custom_ext_inst_set_import - && CustomOp::decode_from_ext_inst(last_normal_inst) - == CustomOp::Abort => - { - assert_eq!(terminator_inst.class.opcode, Op::Unreachable); - true - } - - _ => false, - }) - .then_some(func.def_id().unwrap()) - }) - .collect(), + functions: &functions, + legal_globals: &legal_globals, + functions_that_may_abort: &functions_that_may_abort, }; - - let mut functions: Vec<_> = mem::take(&mut module.functions) - .into_iter() - .map(Ok) - .collect(); - - // Inline functions in post-order (aka inside-out aka bottom-out) - that is, - // callees are processed before their callers, to avoid duplicating work. - for func_idx in call_graph.post_order() { - let mut function = mem::replace(&mut functions[func_idx], Err(FuncIsBeingInlined)).unwrap(); - inliner.inline_fn(&mut function, &functions); - fuse_trivial_branches(&mut function); - functions[func_idx] = Ok(function); + for function in &mut module.functions { + inliner.inline_fn(function); + fuse_trivial_branches(function); } - module.functions = functions.into_iter().map(|func| func.unwrap()).collect(); - - /* // Drop OpName etc. for inlined functions module.debug_names.retain(|inst| { !inst .operands .iter() .any(|op| op.id_ref_any().is_some_and(|id| dropped_ids.contains(&id))) - });*/ + }); Ok(()) } @@ -446,27 +456,19 @@ fn should_inline( Ok(callee_control.contains(FunctionControl::INLINE)) } -/// Helper error type for `Inliner`'s `functions` field, indicating a `Function` -/// was taken out of its slot because it's being inlined. -#[derive(Debug)] -struct FuncIsBeingInlined; - // Steps: // Move OpVariable decls // Rewrite return // Renumber IDs // Insert blocks -struct Inliner<'m> { +struct Inliner<'m, 'map> { /// ID of `OpExtInstImport` for our custom "extended instruction set" /// (see `crate::custom_insts` for more details). custom_ext_inst_set_import: Word, op_type_void_id: Word, - /// Map from each function's ID to its index in `functions`. - func_id_to_idx: FxHashMap, - /// Pre-collected `OpName`s, that can be used to find any function's name /// during inlining (to be able to generate debuginfo that uses names). id_to_name: FxHashMap, @@ -483,12 +485,13 @@ struct Inliner<'m> { annotations: &'m mut Vec, types_global_values: &'m mut Vec, - legal_globals: FxHashMap, - functions_that_may_abort: FxHashSet, + functions: &'map FunctionMap, + legal_globals: &'map FxHashMap, + functions_that_may_abort: &'map FxHashSet, // rewrite_rules: FxHashMap, } -impl Inliner<'_> { +impl Inliner<'_, '_> { fn id(&mut self) -> Word { next_id(self.header) } @@ -533,29 +536,19 @@ impl Inliner<'_> { inst_id } - fn inline_fn( - &mut self, - function: &mut Function, - functions: &[Result], - ) { + fn inline_fn(&mut self, function: &mut Function) { let mut block_idx = 0; while block_idx < function.blocks.len() { // If we successfully inlined a block, then repeat processing on the same block, in // case the newly inlined block has more inlined calls. // TODO: This is quadratic - if !self.inline_block(function, block_idx, functions) { - // TODO(eddyb) skip past the inlined callee without rescanning it. + if !self.inline_block(function, block_idx) { block_idx += 1; } } } - fn inline_block( - &mut self, - caller: &mut Function, - block_idx: usize, - functions: &[Result], - ) -> bool { + fn inline_block(&mut self, caller: &mut Function, block_idx: usize) -> bool { // Find the first inlined OpFunctionCall let call = caller.blocks[block_idx] .instructions @@ -566,8 +559,8 @@ impl Inliner<'_> { ( index, inst, - functions[self.func_id_to_idx[&inst.operands[0].id_ref_any().unwrap()]] - .as_ref() + self.functions + .get(&inst.operands[0].id_ref_any().unwrap()) .unwrap(), ) }) @@ -577,8 +570,8 @@ impl Inliner<'_> { call_inst: inst, }; match should_inline( - &self.legal_globals, - &self.functions_that_may_abort, + self.legal_globals, + self.functions_that_may_abort, f, Some(call_site), ) { @@ -590,16 +583,6 @@ impl Inliner<'_> { None => return false, Some(call) => call, }; - - // Propagate "may abort" from callee to caller (i.e. as aborts get inlined). - if self - .functions_that_may_abort - .contains(&callee.def_id().unwrap()) - { - self.functions_that_may_abort - .insert(caller.def_id().unwrap()); - } - let call_result_type = { let ty = call_inst.result_type.unwrap(); if ty == self.op_type_void_id { @@ -611,7 +594,6 @@ impl Inliner<'_> { let call_result_id = call_inst.result_id.unwrap(); // Get the debuginfo instructions that apply to the call. - // TODO(eddyb) only one instruction should be necessary here w/ bottom-up. let custom_ext_inst_set_import = self.custom_ext_inst_set_import; let call_debug_insts = caller.blocks[block_idx].instructions[..call_index] .iter() @@ -886,7 +868,6 @@ impl Inliner<'_> { .. } = *self; - // TODO(eddyb) kill this as it shouldn't be needed for bottom-up inline. // HACK(eddyb) this is terrible, but we have to deal with it because of // how this inliner is outside-in, instead of inside-out, meaning that // context builds up "outside" of the callee blocks, inside the caller. diff --git a/crates/rustc_codegen_spirv/src/linker/ipo.rs b/crates/rustc_codegen_spirv/src/linker/ipo.rs index 96c7bbe6ae..475f5c50c1 100644 --- a/crates/rustc_codegen_spirv/src/linker/ipo.rs +++ b/crates/rustc_codegen_spirv/src/linker/ipo.rs @@ -4,7 +4,7 @@ use indexmap::IndexSet; use rspirv::dr::Module; -use rspirv::spirv::{Op, Word}; +use rspirv::spirv::Op; use rustc_data_structures::fx::FxHashMap; // FIXME(eddyb) use newtyped indices and `IndexVec`. @@ -19,9 +19,6 @@ pub struct CallGraph { impl CallGraph { pub fn collect(module: &Module) -> Self { - Self::collect_with_func_id_to_idx(module).0 - } - pub fn collect_with_func_id_to_idx(module: &Module) -> (Self, FxHashMap) { let func_id_to_idx: FxHashMap<_, _> = module .functions .iter() @@ -54,13 +51,10 @@ impl CallGraph { .collect() }) .collect(); - ( - Self { - entry_points, - callees, - }, - func_id_to_idx, - ) + Self { + entry_points, + callees, + } } /// Order functions using a post-order traversal, i.e. callees before callers. diff --git a/tests/compiletests/ui/dis/panic_builtin_bounds_check.stderr b/tests/compiletests/ui/dis/panic_builtin_bounds_check.stderr index a3bee642b1..5b2ad4eb9a 100644 --- a/tests/compiletests/ui/dis/panic_builtin_bounds_check.stderr +++ b/tests/compiletests/ui/dis/panic_builtin_bounds_check.stderr @@ -28,13 +28,14 @@ OpLine %5 32 4 %21 = OpVariable %12 Function OpLine %5 32 23 %22 = OpCompositeConstruct %6 %13 %14 %15 %16 -OpLine %5 27 4 +OpLine %5 32 4 %23 = OpCompositeExtract %9 %22 0 %24 = OpCompositeExtract %9 %22 1 %25 = OpCompositeExtract %9 %22 2 %26 = OpCompositeExtract %9 %22 3 %27 = OpCompositeConstruct %11 %23 %24 %25 %26 OpStore %21 %27 +OpLine %5 27 4 %28 = OpULessThan %17 %18 %10 OpNoLine OpSelectionMerge %29 None diff --git a/tests/compiletests/ui/lang/consts/nested-ref.stderr b/tests/compiletests/ui/lang/consts/nested-ref.stderr new file mode 100644 index 0000000000..692ea48ed7 --- /dev/null +++ b/tests/compiletests/ui/lang/consts/nested-ref.stderr @@ -0,0 +1,6 @@ +warning: `#[inline(never)]` function `nested_ref::deep_load` needs to be inlined because it has illegal argument or return types + +warning: `#[inline(never)]` function `nested_ref::deep_transpose` needs to be inlined because it has illegal argument or return types + +warning: 2 warnings emitted + diff --git a/tests/compiletests/ui/lang/core/ref/member_ref_arg-broken.stderr b/tests/compiletests/ui/lang/core/ref/member_ref_arg-broken.stderr index f86e4101d8..35f37f16a8 100644 --- a/tests/compiletests/ui/lang/core/ref/member_ref_arg-broken.stderr +++ b/tests/compiletests/ui/lang/core/ref/member_ref_arg-broken.stderr @@ -1,8 +1,12 @@ +warning: `#[inline(never)]` function `member_ref_arg_broken::h` needs to be inlined because it has illegal argument or return types + +warning: `#[inline(never)]` function `member_ref_arg_broken::h_newtyped` needs to be inlined because it has illegal argument or return types + error: error:0:0 - OpLoad Pointer '$ID[%$ID]' is not a logical pointer. %39 = OpLoad %uint %38 | = note: spirv-val failed = note: module `$TEST_BUILD_DIR/lang/core/ref/member_ref_arg-broken` -error: aborting due to 1 previous error +error: aborting due to 1 previous error; 2 warnings emitted From e796516a929a891af90141f90d5f7d0061a3eca0 Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 9 Sep 2025 11:40:02 +0300 Subject: [PATCH 08/23] Revert "linker/inline: fix `OpVariable` debuginfo collection and insertion." This reverts commit 355122de8b7b940d987eb499e11a74ca3fbe1dc3. --- .../rustc_codegen_spirv/src/linker/inline.rs | 202 +++++------------- 1 file changed, 52 insertions(+), 150 deletions(-) diff --git a/crates/rustc_codegen_spirv/src/linker/inline.rs b/crates/rustc_codegen_spirv/src/linker/inline.rs index 1e8ddbc5b5..a1892875ab 100644 --- a/crates/rustc_codegen_spirv/src/linker/inline.rs +++ b/crates/rustc_codegen_spirv/src/linker/inline.rs @@ -15,7 +15,7 @@ use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_errors::ErrorGuaranteed; use rustc_session::Session; use smallvec::SmallVec; -use std::mem; +use std::mem::{self, take}; type FunctionMap = FxHashMap; @@ -658,13 +658,15 @@ impl Inliner<'_, '_> { if let Some(call_result_type) = call_result_type { // Generate the storage space for the return value: Do this *after* the split above, // because if block_idx=0, inserting a variable here shifts call_index. - let ret_var_inst = Instruction::new( - Op::Variable, - Some(self.ptr_ty(call_result_type)), - Some(return_variable.unwrap()), - vec![Operand::StorageClass(StorageClass::Function)], + insert_opvariables( + &mut caller.blocks[0], + [Instruction::new( + Op::Variable, + Some(self.ptr_ty(call_result_type)), + Some(return_variable.unwrap()), + vec![Operand::StorageClass(StorageClass::Function)], + )], ); - self.insert_opvariables(&mut caller.blocks[0], [ret_var_inst]); } // Insert non-entry inlined callee blocks just after the pre-call block. @@ -710,115 +712,52 @@ impl Inliner<'_, '_> { // Fuse the inlined callee entry block into the pre-call block. // This is okay because it's illegal to branch to the first BB in a function. { - // NOTE(eddyb) `OpExtInst`s have a result ID, even if unused, and - // it has to be unique, so this allocates new IDs as-needed. - let instantiate_debuginfo = |this: &mut Self, inst: &Instruction| { - let mut inst = inst.clone(); - if let Some(id) = &mut inst.result_id { - *id = this.id(); - } - inst - }; - - let custom_inst_to_inst = |this: &mut Self, inst: CustomInst<_>| { - Instruction::new( - Op::ExtInst, - Some(this.op_type_void_id), - Some(this.id()), - [ - Operand::IdRef(this.custom_ext_inst_set_import), - Operand::LiteralExtInstInteger(inst.op() as u32), - ] - .into_iter() - .chain(inst.into_operands()) - .collect(), - ) - }; - // Return the subsequence of `insts` made from `OpVariable`s, and any // debuginfo instructions (which may apply to them), while removing // *only* `OpVariable`s from `insts` (and keeping debuginfo in both). let mut steal_vars = |insts: &mut Vec| { - // HACK(eddyb) this duplicates some code from `get_inlined_blocks`, - // but that will be removed once the inliner is refactored to be - // inside-out instead of outside-in (already finished in a branch). - let mut enclosing_inlined_frames = SmallVec::<[_; 8]>::new(); - let mut current_debug_src_loc_inst = None; - let mut vars_and_debuginfo_range = 0..0; - while vars_and_debuginfo_range.end < insts.len() { - let inst = &insts[vars_and_debuginfo_range.end]; - match inst.class.opcode { - Op::Line => current_debug_src_loc_inst = Some(inst), - Op::NoLine => current_debug_src_loc_inst = None, - Op::ExtInst - if inst.operands[0].unwrap_id_ref() - == self.custom_ext_inst_set_import => - { - match CustomOp::decode_from_ext_inst(inst) { - CustomOp::SetDebugSrcLoc => current_debug_src_loc_inst = Some(inst), - CustomOp::ClearDebugSrcLoc => current_debug_src_loc_inst = None, - CustomOp::PushInlinedCallFrame => { - enclosing_inlined_frames - .push((current_debug_src_loc_inst.take(), inst)); - } - CustomOp::PopInlinedCallFrame => { - if let Some((callsite_debug_src_loc_inst, _)) = - enclosing_inlined_frames.pop() - { - current_debug_src_loc_inst = callsite_debug_src_loc_inst; - } - } - CustomOp::Abort => break, - } + let mut vars_and_debuginfo = vec![]; + insts.retain_mut(|inst| { + let is_debuginfo = match inst.class.opcode { + Op::Line | Op::NoLine => true, + Op::ExtInst => { + inst.operands[0].unwrap_id_ref() == self.custom_ext_inst_set_import + && CustomOp::decode_from_ext_inst(inst).is_debuginfo() } - Op::Variable => {} - _ => break, + _ => false, + }; + if is_debuginfo { + // NOTE(eddyb) `OpExtInst`s have a result ID, + // even if unused, and it has to be unique. + let mut inst = inst.clone(); + if let Some(id) = &mut inst.result_id { + *id = self.id(); + } + vars_and_debuginfo.push(inst); + true + } else if inst.class.opcode == Op::Variable { + // HACK(eddyb) we're removing this `Instruction` from + // `inst`, so it doesn't really matter what we use here. + vars_and_debuginfo.push(mem::replace( + inst, + Instruction::new(Op::Nop, None, None, vec![]), + )); + false + } else { + true } - vars_and_debuginfo_range.end += 1; - } - - // `vars_and_debuginfo_range.end` indicates where `OpVariable`s - // end and other instructions start (module debuginfo), but to - // split the block in two, both sides of the "cut" need "repair": - // - the variables are missing "inlined call frames" pops, that - // may happen later in the block, and have to be synthesized - // - the non-variables are missing "inlined call frames" pushes, - // that must be recreated to avoid ending up with dangling pops - // - // FIXME(eddyb) this only collects to avoid borrow conflicts, - // between e.g. `enclosing_inlined_frames` and mutating `insts`, - // but also between different uses of `self`. - let all_pops_after_vars: SmallVec<[_; 8]> = enclosing_inlined_frames - .iter() - .map(|_| custom_inst_to_inst(self, CustomInst::PopInlinedCallFrame)) - .collect(); - let all_repushes_before_non_vars: SmallVec<[_; 8]> = - (enclosing_inlined_frames.into_iter().flat_map( - |(callsite_debug_src_loc_inst, push_inlined_call_frame_inst)| { - (callsite_debug_src_loc_inst.into_iter()) - .chain([push_inlined_call_frame_inst]) - }, - )) - .chain(current_debug_src_loc_inst) - .map(|inst| instantiate_debuginfo(self, inst)) - .collect(); - - let vars_and_debuginfo = - insts.splice(vars_and_debuginfo_range, all_repushes_before_non_vars); - let repaired_vars_and_debuginfo = vars_and_debuginfo.chain(all_pops_after_vars); - - // FIXME(eddyb) collecting shouldn't be necessary but this is - // nested in a closure, and `splice` borrows the original `Vec`. - repaired_vars_and_debuginfo.collect::>() + }); + vars_and_debuginfo }; let [mut inlined_callee_entry_block]: [_; 1] = inlined_callee_blocks.try_into().unwrap(); // Move the `OpVariable`s of the callee to the caller. - let callee_vars_and_debuginfo = - steal_vars(&mut inlined_callee_entry_block.instructions); - self.insert_opvariables(&mut caller.blocks[0], callee_vars_and_debuginfo); + insert_opvariables( + &mut caller.blocks[0], + steal_vars(&mut inlined_callee_entry_block.instructions), + ); caller.blocks[pre_call_block_idx] .instructions @@ -1051,52 +990,15 @@ impl Inliner<'_, '_> { caller_restore_debuginfo_after_call, ) } +} - fn insert_opvariables(&self, block: &mut Block, insts: impl IntoIterator) { - // HACK(eddyb) this isn't as efficient as it could be in theory, but it's - // very important to make sure sure to never insert new instructions in - // the middle of debuginfo (as it would be affected by it). - let mut inlined_frames_depth = 0usize; - let mut outermost_has_debug_src_loc = false; - let mut last_debugless_var_insertion_point_candidate = None; - for (i, inst) in block.instructions.iter().enumerate() { - last_debugless_var_insertion_point_candidate = - (inlined_frames_depth == 0 && !outermost_has_debug_src_loc).then_some(i); - - let changed_has_debug_src_loc = match inst.class.opcode { - Op::Line => true, - Op::NoLine => false, - Op::ExtInst - if inst.operands[0].unwrap_id_ref() == self.custom_ext_inst_set_import => - { - match CustomOp::decode_from_ext_inst(inst) { - CustomOp::SetDebugSrcLoc => true, - CustomOp::ClearDebugSrcLoc => false, - CustomOp::PushInlinedCallFrame => { - inlined_frames_depth += 1; - continue; - } - CustomOp::PopInlinedCallFrame => { - inlined_frames_depth = inlined_frames_depth.saturating_sub(1); - continue; - } - CustomOp::Abort => break, - } - } - Op::Variable => continue, - _ => break, - }; - - if inlined_frames_depth == 0 { - outermost_has_debug_src_loc = changed_has_debug_src_loc; - } - } - - // HACK(eddyb) fallback to inserting at the start, which should be correct. - // FIXME(eddyb) some level of debuginfo repair could prevent needing this. - let i = last_debugless_var_insertion_point_candidate.unwrap_or(0); - block.instructions.splice(i..i, insts); - } +fn insert_opvariables(block: &mut Block, insts: impl IntoIterator) { + let first_non_variable = block + .instructions + .iter() + .position(|inst| inst.class.opcode != Op::Variable); + let i = first_non_variable.unwrap_or(block.instructions.len()); + block.instructions.splice(i..i, insts); } fn fuse_trivial_branches(function: &mut Function) { @@ -1131,7 +1033,7 @@ fn fuse_trivial_branches(function: &mut Function) { }; let pred_insts = &function.blocks[pred].instructions; if pred_insts.last().unwrap().class.opcode == Op::Branch { - let mut dest_insts = mem::take(&mut function.blocks[dest_block].instructions); + let mut dest_insts = take(&mut function.blocks[dest_block].instructions); let pred_insts = &mut function.blocks[pred].instructions; pred_insts.pop(); // pop the branch pred_insts.append(&mut dest_insts); From 95ee5bc1d7bdbe79953b89b5ce7c98550f4b56cf Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 9 Sep 2025 11:40:02 +0300 Subject: [PATCH 09/23] [2024] linker/inline: fix `OpVariable` debuginfo collection and insertion. --- .../rustc_codegen_spirv/src/linker/inline.rs | 202 +++++++++++++----- 1 file changed, 150 insertions(+), 52 deletions(-) diff --git a/crates/rustc_codegen_spirv/src/linker/inline.rs b/crates/rustc_codegen_spirv/src/linker/inline.rs index a1892875ab..1e8ddbc5b5 100644 --- a/crates/rustc_codegen_spirv/src/linker/inline.rs +++ b/crates/rustc_codegen_spirv/src/linker/inline.rs @@ -15,7 +15,7 @@ use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_errors::ErrorGuaranteed; use rustc_session::Session; use smallvec::SmallVec; -use std::mem::{self, take}; +use std::mem; type FunctionMap = FxHashMap; @@ -658,15 +658,13 @@ impl Inliner<'_, '_> { if let Some(call_result_type) = call_result_type { // Generate the storage space for the return value: Do this *after* the split above, // because if block_idx=0, inserting a variable here shifts call_index. - insert_opvariables( - &mut caller.blocks[0], - [Instruction::new( - Op::Variable, - Some(self.ptr_ty(call_result_type)), - Some(return_variable.unwrap()), - vec![Operand::StorageClass(StorageClass::Function)], - )], + let ret_var_inst = Instruction::new( + Op::Variable, + Some(self.ptr_ty(call_result_type)), + Some(return_variable.unwrap()), + vec![Operand::StorageClass(StorageClass::Function)], ); + self.insert_opvariables(&mut caller.blocks[0], [ret_var_inst]); } // Insert non-entry inlined callee blocks just after the pre-call block. @@ -712,52 +710,115 @@ impl Inliner<'_, '_> { // Fuse the inlined callee entry block into the pre-call block. // This is okay because it's illegal to branch to the first BB in a function. { + // NOTE(eddyb) `OpExtInst`s have a result ID, even if unused, and + // it has to be unique, so this allocates new IDs as-needed. + let instantiate_debuginfo = |this: &mut Self, inst: &Instruction| { + let mut inst = inst.clone(); + if let Some(id) = &mut inst.result_id { + *id = this.id(); + } + inst + }; + + let custom_inst_to_inst = |this: &mut Self, inst: CustomInst<_>| { + Instruction::new( + Op::ExtInst, + Some(this.op_type_void_id), + Some(this.id()), + [ + Operand::IdRef(this.custom_ext_inst_set_import), + Operand::LiteralExtInstInteger(inst.op() as u32), + ] + .into_iter() + .chain(inst.into_operands()) + .collect(), + ) + }; + // Return the subsequence of `insts` made from `OpVariable`s, and any // debuginfo instructions (which may apply to them), while removing // *only* `OpVariable`s from `insts` (and keeping debuginfo in both). let mut steal_vars = |insts: &mut Vec| { - let mut vars_and_debuginfo = vec![]; - insts.retain_mut(|inst| { - let is_debuginfo = match inst.class.opcode { - Op::Line | Op::NoLine => true, - Op::ExtInst => { - inst.operands[0].unwrap_id_ref() == self.custom_ext_inst_set_import - && CustomOp::decode_from_ext_inst(inst).is_debuginfo() - } - _ => false, - }; - if is_debuginfo { - // NOTE(eddyb) `OpExtInst`s have a result ID, - // even if unused, and it has to be unique. - let mut inst = inst.clone(); - if let Some(id) = &mut inst.result_id { - *id = self.id(); + // HACK(eddyb) this duplicates some code from `get_inlined_blocks`, + // but that will be removed once the inliner is refactored to be + // inside-out instead of outside-in (already finished in a branch). + let mut enclosing_inlined_frames = SmallVec::<[_; 8]>::new(); + let mut current_debug_src_loc_inst = None; + let mut vars_and_debuginfo_range = 0..0; + while vars_and_debuginfo_range.end < insts.len() { + let inst = &insts[vars_and_debuginfo_range.end]; + match inst.class.opcode { + Op::Line => current_debug_src_loc_inst = Some(inst), + Op::NoLine => current_debug_src_loc_inst = None, + Op::ExtInst + if inst.operands[0].unwrap_id_ref() + == self.custom_ext_inst_set_import => + { + match CustomOp::decode_from_ext_inst(inst) { + CustomOp::SetDebugSrcLoc => current_debug_src_loc_inst = Some(inst), + CustomOp::ClearDebugSrcLoc => current_debug_src_loc_inst = None, + CustomOp::PushInlinedCallFrame => { + enclosing_inlined_frames + .push((current_debug_src_loc_inst.take(), inst)); + } + CustomOp::PopInlinedCallFrame => { + if let Some((callsite_debug_src_loc_inst, _)) = + enclosing_inlined_frames.pop() + { + current_debug_src_loc_inst = callsite_debug_src_loc_inst; + } + } + CustomOp::Abort => break, + } } - vars_and_debuginfo.push(inst); - true - } else if inst.class.opcode == Op::Variable { - // HACK(eddyb) we're removing this `Instruction` from - // `inst`, so it doesn't really matter what we use here. - vars_and_debuginfo.push(mem::replace( - inst, - Instruction::new(Op::Nop, None, None, vec![]), - )); - false - } else { - true + Op::Variable => {} + _ => break, } - }); - vars_and_debuginfo + vars_and_debuginfo_range.end += 1; + } + + // `vars_and_debuginfo_range.end` indicates where `OpVariable`s + // end and other instructions start (module debuginfo), but to + // split the block in two, both sides of the "cut" need "repair": + // - the variables are missing "inlined call frames" pops, that + // may happen later in the block, and have to be synthesized + // - the non-variables are missing "inlined call frames" pushes, + // that must be recreated to avoid ending up with dangling pops + // + // FIXME(eddyb) this only collects to avoid borrow conflicts, + // between e.g. `enclosing_inlined_frames` and mutating `insts`, + // but also between different uses of `self`. + let all_pops_after_vars: SmallVec<[_; 8]> = enclosing_inlined_frames + .iter() + .map(|_| custom_inst_to_inst(self, CustomInst::PopInlinedCallFrame)) + .collect(); + let all_repushes_before_non_vars: SmallVec<[_; 8]> = + (enclosing_inlined_frames.into_iter().flat_map( + |(callsite_debug_src_loc_inst, push_inlined_call_frame_inst)| { + (callsite_debug_src_loc_inst.into_iter()) + .chain([push_inlined_call_frame_inst]) + }, + )) + .chain(current_debug_src_loc_inst) + .map(|inst| instantiate_debuginfo(self, inst)) + .collect(); + + let vars_and_debuginfo = + insts.splice(vars_and_debuginfo_range, all_repushes_before_non_vars); + let repaired_vars_and_debuginfo = vars_and_debuginfo.chain(all_pops_after_vars); + + // FIXME(eddyb) collecting shouldn't be necessary but this is + // nested in a closure, and `splice` borrows the original `Vec`. + repaired_vars_and_debuginfo.collect::>() }; let [mut inlined_callee_entry_block]: [_; 1] = inlined_callee_blocks.try_into().unwrap(); // Move the `OpVariable`s of the callee to the caller. - insert_opvariables( - &mut caller.blocks[0], - steal_vars(&mut inlined_callee_entry_block.instructions), - ); + let callee_vars_and_debuginfo = + steal_vars(&mut inlined_callee_entry_block.instructions); + self.insert_opvariables(&mut caller.blocks[0], callee_vars_and_debuginfo); caller.blocks[pre_call_block_idx] .instructions @@ -990,15 +1051,52 @@ impl Inliner<'_, '_> { caller_restore_debuginfo_after_call, ) } -} -fn insert_opvariables(block: &mut Block, insts: impl IntoIterator) { - let first_non_variable = block - .instructions - .iter() - .position(|inst| inst.class.opcode != Op::Variable); - let i = first_non_variable.unwrap_or(block.instructions.len()); - block.instructions.splice(i..i, insts); + fn insert_opvariables(&self, block: &mut Block, insts: impl IntoIterator) { + // HACK(eddyb) this isn't as efficient as it could be in theory, but it's + // very important to make sure sure to never insert new instructions in + // the middle of debuginfo (as it would be affected by it). + let mut inlined_frames_depth = 0usize; + let mut outermost_has_debug_src_loc = false; + let mut last_debugless_var_insertion_point_candidate = None; + for (i, inst) in block.instructions.iter().enumerate() { + last_debugless_var_insertion_point_candidate = + (inlined_frames_depth == 0 && !outermost_has_debug_src_loc).then_some(i); + + let changed_has_debug_src_loc = match inst.class.opcode { + Op::Line => true, + Op::NoLine => false, + Op::ExtInst + if inst.operands[0].unwrap_id_ref() == self.custom_ext_inst_set_import => + { + match CustomOp::decode_from_ext_inst(inst) { + CustomOp::SetDebugSrcLoc => true, + CustomOp::ClearDebugSrcLoc => false, + CustomOp::PushInlinedCallFrame => { + inlined_frames_depth += 1; + continue; + } + CustomOp::PopInlinedCallFrame => { + inlined_frames_depth = inlined_frames_depth.saturating_sub(1); + continue; + } + CustomOp::Abort => break, + } + } + Op::Variable => continue, + _ => break, + }; + + if inlined_frames_depth == 0 { + outermost_has_debug_src_loc = changed_has_debug_src_loc; + } + } + + // HACK(eddyb) fallback to inserting at the start, which should be correct. + // FIXME(eddyb) some level of debuginfo repair could prevent needing this. + let i = last_debugless_var_insertion_point_candidate.unwrap_or(0); + block.instructions.splice(i..i, insts); + } } fn fuse_trivial_branches(function: &mut Function) { @@ -1033,7 +1131,7 @@ fn fuse_trivial_branches(function: &mut Function) { }; let pred_insts = &function.blocks[pred].instructions; if pred_insts.last().unwrap().class.opcode == Op::Branch { - let mut dest_insts = take(&mut function.blocks[dest_block].instructions); + let mut dest_insts = mem::take(&mut function.blocks[dest_block].instructions); let pred_insts = &mut function.blocks[pred].instructions; pred_insts.pop(); // pop the branch pred_insts.append(&mut dest_insts); From fdd29c1b91bf3c3a31168e7ceeaf23c9b6ae5760 Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 9 Sep 2025 11:40:02 +0300 Subject: [PATCH 10/23] [2024] linker/inline: use bottom-up inlining to minimize redundancy. --- .../rustc_codegen_spirv/src/linker/inline.rs | 448 +++++++++--------- crates/rustc_codegen_spirv/src/linker/ipo.rs | 16 +- .../ui/dis/panic_builtin_bounds_check.stderr | 3 +- .../ui/lang/consts/nested-ref.stderr | 18 +- .../core/ref/member_ref_arg-broken.stderr | 38 +- .../ui/lang/core/ref/member_ref_arg.stderr | 20 + .../ui/lang/panic/track_caller.stderr | 11 + 7 files changed, 311 insertions(+), 243 deletions(-) create mode 100644 tests/compiletests/ui/lang/core/ref/member_ref_arg.stderr create mode 100644 tests/compiletests/ui/lang/panic/track_caller.stderr diff --git a/crates/rustc_codegen_spirv/src/linker/inline.rs b/crates/rustc_codegen_spirv/src/linker/inline.rs index 1e8ddbc5b5..e2aac8c548 100644 --- a/crates/rustc_codegen_spirv/src/linker/inline.rs +++ b/crates/rustc_codegen_spirv/src/linker/inline.rs @@ -8,17 +8,17 @@ use super::apply_rewrite_rules; use super::ipo::CallGraph; use super::simple_passes::outgoing_edges; use super::{get_name, get_names}; +use crate::custom_decorations::SpanRegenerator; use crate::custom_insts::{self, CustomInst, CustomOp}; use rspirv::dr::{Block, Function, Instruction, Module, ModuleHeader, Operand}; use rspirv::spirv::{FunctionControl, Op, StorageClass, Word}; -use rustc_data_structures::fx::{FxHashMap, FxHashSet}; +use rustc_data_structures::fx::{FxHashMap, FxHashSet, FxIndexMap, FxIndexSet}; use rustc_errors::ErrorGuaranteed; use rustc_session::Session; use smallvec::SmallVec; +use std::cmp::Ordering; use std::mem; -type FunctionMap = FxHashMap; - // FIXME(eddyb) this is a bit silly, but this keeps being repeated everywhere. fn next_id(header: &mut ModuleHeader) -> Word { let result = header.bound; @@ -30,6 +30,9 @@ pub fn inline(sess: &Session, module: &mut Module) -> super::Result<()> { // This algorithm gets real sad if there's recursion - but, good news, SPIR-V bans recursion deny_recursion_in_module(sess, module)?; + // Compute the call-graph that will drive (inside-out, aka bottom-up) inlining. + let (call_graph, func_id_to_idx) = CallGraph::collect_with_func_id_to_idx(module); + let custom_ext_inst_set_import = module .ext_inst_imports .iter() @@ -39,92 +42,10 @@ pub fn inline(sess: &Session, module: &mut Module) -> super::Result<()> { }) .map(|inst| inst.result_id.unwrap()); - // HACK(eddyb) compute the set of functions that may `Abort` *transitively*, - // which is only needed because of how we inline (sometimes it's outside-in, - // aka top-down, instead of always being inside-out, aka bottom-up). - // - // (inlining is needed in the first place because our custom `Abort` - // instructions get lowered to a simple `OpReturn` in entry-points, but - // that requires that they get inlined all the way up to the entry-points) - let functions_that_may_abort = custom_ext_inst_set_import - .map(|custom_ext_inst_set_import| { - let mut may_abort_by_id = FxHashSet::default(); - - // FIXME(eddyb) use this `CallGraph` abstraction more during inlining. - let call_graph = CallGraph::collect(module); - for func_idx in call_graph.post_order() { - let func_id = module.functions[func_idx].def_id().unwrap(); - - let any_callee_may_abort = call_graph.callees[func_idx].iter().any(|&callee_idx| { - may_abort_by_id.contains(&module.functions[callee_idx].def_id().unwrap()) - }); - if any_callee_may_abort { - may_abort_by_id.insert(func_id); - continue; - } - - let may_abort_directly = module.functions[func_idx].blocks.iter().any(|block| { - match &block.instructions[..] { - [.., last_normal_inst, terminator_inst] - if last_normal_inst.class.opcode == Op::ExtInst - && last_normal_inst.operands[0].unwrap_id_ref() - == custom_ext_inst_set_import - && CustomOp::decode_from_ext_inst(last_normal_inst) - == CustomOp::Abort => - { - assert_eq!(terminator_inst.class.opcode, Op::Unreachable); - true - } - - _ => false, - } - }); - if may_abort_directly { - may_abort_by_id.insert(func_id); - } - } - - may_abort_by_id - }) - .unwrap_or_default(); - - let functions = module - .functions - .iter() - .map(|f| (f.def_id().unwrap(), f.clone())) - .collect(); let legal_globals = LegalGlobal::gather_from_module(module); - // Drop all the functions we'll be inlining. (This also means we won't waste time processing - // inlines in functions that will get inlined) - let mut dropped_ids = FxHashSet::default(); - let mut inlined_to_legalize_dont_inlines = Vec::new(); - module.functions.retain(|f| { - let should_inline_f = should_inline(&legal_globals, &functions_that_may_abort, f, None); - if should_inline_f != Ok(false) { - if should_inline_f == Err(MustInlineToLegalize) && has_dont_inline(f) { - inlined_to_legalize_dont_inlines.push(f.def_id().unwrap()); - } - // TODO: We should insert all defined IDs in this function. - dropped_ids.insert(f.def_id().unwrap()); - false - } else { - true - } - }); - - if !inlined_to_legalize_dont_inlines.is_empty() { - let names = get_names(module); - for f in inlined_to_legalize_dont_inlines { - sess.dcx().warn(format!( - "`#[inline(never)]` function `{}` needs to be inlined \ - because it has illegal argument or return types", - get_name(&names, f) - )); - } - } - let header = module.header.as_mut().unwrap(); + // FIXME(eddyb) clippy false positive (separate `map` required for borrowck). #[allow(clippy::map_unwrap_or)] let mut inliner = Inliner { @@ -154,6 +75,8 @@ pub fn inline(sess: &Session, module: &mut Module) -> super::Result<()> { id }), + func_id_to_idx, + id_to_name: module .debug_names .iter() @@ -173,22 +96,101 @@ pub fn inline(sess: &Session, module: &mut Module) -> super::Result<()> { annotations: &mut module.annotations, types_global_values: &mut module.types_global_values, - functions: &functions, - legal_globals: &legal_globals, - functions_that_may_abort: &functions_that_may_abort, + legal_globals, + + // NOTE(eddyb) this is needed because our custom `Abort` instructions get + // lowered to a simple `OpReturn` in entry-points, but that requires that + // they get inlined all the way up to the entry-points in the first place. + functions_that_may_abort: module + .functions + .iter() + .filter_map(|func| { + let custom_ext_inst_set_import = custom_ext_inst_set_import?; + func.blocks + .iter() + .any(|block| match &block.instructions[..] { + [.., last_normal_inst, terminator_inst] + if last_normal_inst.class.opcode == Op::ExtInst + && last_normal_inst.operands[0].unwrap_id_ref() + == custom_ext_inst_set_import + && CustomOp::decode_from_ext_inst(last_normal_inst) + == CustomOp::Abort => + { + assert_eq!(terminator_inst.class.opcode, Op::Unreachable); + true + } + + _ => false, + }) + .then_some(func.def_id().unwrap()) + }) + .collect(), + + inlined_dont_inlines_to_cause_and_callers: FxIndexMap::default(), }; - for function in &mut module.functions { - inliner.inline_fn(function); - fuse_trivial_branches(function); + + let mut functions: Vec<_> = mem::take(&mut module.functions) + .into_iter() + .map(Ok) + .collect(); + + // Inline functions in post-order (aka inside-out aka bottom-out) - that is, + // callees are processed before their callers, to avoid duplicating work. + for func_idx in call_graph.post_order() { + let mut function = mem::replace(&mut functions[func_idx], Err(FuncIsBeingInlined)).unwrap(); + inliner.inline_fn(&mut function, &functions); + fuse_trivial_branches(&mut function); + functions[func_idx] = Ok(function); } - // Drop OpName etc. for inlined functions - module.debug_names.retain(|inst| { - !inst - .operands - .iter() - .any(|op| op.id_ref_any().is_some_and(|id| dropped_ids.contains(&id))) - }); + module.functions = functions.into_iter().map(|func| func.unwrap()).collect(); + + let Inliner { + id_to_name, + inlined_dont_inlines_to_cause_and_callers, + .. + } = inliner; + + let mut span_regen = SpanRegenerator::new(sess.source_map(), module); + for (callee_id, (cause, callers)) in inlined_dont_inlines_to_cause_and_callers { + let callee_name = get_name(&id_to_name, callee_id); + + // HACK(eddyb) `libcore` hides panics behind `#[inline(never)]` `fn`s, + // making this too noisy and useless (since it's an impl detail). + if cause == "panicking" && callee_name.starts_with("core::") { + continue; + } + + let callee_span = span_regen + .src_loc_for_id(callee_id) + .and_then(|src_loc| span_regen.src_loc_to_rustc(src_loc)) + .unwrap_or_default(); + sess.dcx() + .struct_span_warn( + callee_span, + format!("`#[inline(never)]` function `{callee_name}` has been inlined"), + ) + .with_note(format!("inlining was required due to {cause}")) + .with_note(format!( + "called from {}", + callers + .iter() + .enumerate() + .filter_map(|(i, &caller_id)| { + // HACK(eddyb) avoid showing too many names. + match i.cmp(&4) { + Ordering::Less => { + Some(format!("`{}`", get_name(&id_to_name, caller_id))) + } + Ordering::Equal => Some(format!("and {} more", callers.len() - i)), + Ordering::Greater => None, + } + }) + .collect::>() + .join(", ") + )) + .emit(); + } Ok(()) } @@ -371,42 +373,42 @@ fn has_dont_inline(function: &Function) -> bool { /// Helper error type for `should_inline` (see its doc comment). #[derive(Copy, Clone, PartialEq, Eq)] -struct MustInlineToLegalize; +struct MustInlineToLegalize(&'static str); -/// Returns `Ok(true)`/`Err(MustInlineToLegalize)` if `callee` should/must be +/// Returns `Ok(true)`/`Err(MustInlineToLegalize(_))` if `callee` should/must be /// inlined (either in general, or specifically from `call_site`, if provided). /// -/// The distinction made is that `Err(MustInlineToLegalize)` is not a heuristic, -/// and inlining is *mandatory* due to an illegal signature/arguments. +/// The distinction made here is that `Err(MustInlineToLegalize(cause))` is +/// very much *not* a heuristic, and inlining is *mandatory* due to `cause` +/// (usually illegal signature/arguments, but also the panicking mechanism). +// +// FIXME(eddyb) the causes here are not fine-grained enough. fn should_inline( legal_globals: &FxHashMap, functions_that_may_abort: &FxHashSet, callee: &Function, - call_site: Option>, + call_site: CallSite<'_>, ) -> Result { let callee_def = callee.def.as_ref().unwrap(); let callee_control = callee_def.operands[0].unwrap_function_control(); - // HACK(eddyb) this "has a call-site" check ensures entry-points don't get - // accidentally removed as "must inline to legalize" function, but can still - // be inlined into other entry-points (if such an unusual situation arises). - if call_site.is_some() && functions_that_may_abort.contains(&callee.def_id().unwrap()) { - return Err(MustInlineToLegalize); + if functions_that_may_abort.contains(&callee.def_id().unwrap()) { + return Err(MustInlineToLegalize("panicking")); } let ret_ty = legal_globals .get(&callee_def.result_type.unwrap()) - .ok_or(MustInlineToLegalize)?; + .ok_or(MustInlineToLegalize("illegal return type"))?; if !ret_ty.legal_as_fn_ret_ty() { - return Err(MustInlineToLegalize); + return Err(MustInlineToLegalize("illegal (pointer) return type")); } for (i, param) in callee.parameters.iter().enumerate() { let param_ty = legal_globals .get(param.result_type.as_ref().unwrap()) - .ok_or(MustInlineToLegalize)?; + .ok_or(MustInlineToLegalize("illegal parameter type"))?; if !param_ty.legal_as_fn_param_ty() { - return Err(MustInlineToLegalize); + return Err(MustInlineToLegalize("illegal (pointer) parameter type")); } // If the call isn't passing a legal pointer argument (a "memory object", @@ -414,13 +416,13 @@ fn should_inline( // then inlining is required to have a chance at producing legal SPIR-V. // // FIXME(eddyb) rewriting away the pointer could be another alternative. - if let (LegalGlobal::TypePointer(_), Some(call_site)) = (param_ty, call_site) { + if let LegalGlobal::TypePointer(_) = param_ty { let ptr_arg = call_site.call_inst.operands[i + 1].unwrap_id_ref(); match legal_globals.get(&ptr_arg) { Some(LegalGlobal::Variable) => {} // FIXME(eddyb) should some constants (undef/null) be allowed? - Some(_) => return Err(MustInlineToLegalize), + Some(_) => return Err(MustInlineToLegalize("illegal (pointer) argument")), None => { let mut caller_param_and_var_ids = call_site @@ -446,7 +448,7 @@ fn should_inline( .map(|caller_inst| caller_inst.result_id.unwrap()); if !caller_param_and_var_ids.any(|id| ptr_arg == id) { - return Err(MustInlineToLegalize); + return Err(MustInlineToLegalize("illegal (pointer) argument")); } } } @@ -456,38 +458,46 @@ fn should_inline( Ok(callee_control.contains(FunctionControl::INLINE)) } +/// Helper error type for `Inliner`'s `functions` field, indicating a `Function` +/// was taken out of its slot because it's being inlined. +#[derive(Debug)] +struct FuncIsBeingInlined; + // Steps: // Move OpVariable decls // Rewrite return // Renumber IDs // Insert blocks -struct Inliner<'m, 'map> { +struct Inliner<'a, 'b> { /// ID of `OpExtInstImport` for our custom "extended instruction set" /// (see `crate::custom_insts` for more details). custom_ext_inst_set_import: Word, op_type_void_id: Word, + /// Map from each function's ID to its index in `functions`. + func_id_to_idx: FxHashMap, + /// Pre-collected `OpName`s, that can be used to find any function's name /// during inlining (to be able to generate debuginfo that uses names). - id_to_name: FxHashMap, + id_to_name: FxHashMap, /// `OpString` cache (for deduplicating `OpString`s for the same string). // // FIXME(eddyb) currently this doesn't reuse existing `OpString`s, but since // this is mostly for inlined callee names, it's expected almost no overlap // exists between existing `OpString`s and new ones, anyway. - cached_op_strings: FxHashMap<&'m str, Word>, + cached_op_strings: FxHashMap<&'a str, Word>, - header: &'m mut ModuleHeader, - debug_string_source: &'m mut Vec, - annotations: &'m mut Vec, - types_global_values: &'m mut Vec, + header: &'b mut ModuleHeader, + debug_string_source: &'b mut Vec, + annotations: &'b mut Vec, + types_global_values: &'b mut Vec, - functions: &'map FunctionMap, - legal_globals: &'map FxHashMap, - functions_that_may_abort: &'map FxHashSet, + legal_globals: FxHashMap, + functions_that_may_abort: FxHashSet, + inlined_dont_inlines_to_cause_and_callers: FxIndexMap)>, // rewrite_rules: FxHashMap, } @@ -536,19 +546,29 @@ impl Inliner<'_, '_> { inst_id } - fn inline_fn(&mut self, function: &mut Function) { + fn inline_fn( + &mut self, + function: &mut Function, + functions: &[Result], + ) { let mut block_idx = 0; while block_idx < function.blocks.len() { // If we successfully inlined a block, then repeat processing on the same block, in // case the newly inlined block has more inlined calls. // TODO: This is quadratic - if !self.inline_block(function, block_idx) { + if !self.inline_block(function, block_idx, functions) { + // TODO(eddyb) skip past the inlined callee without rescanning it. block_idx += 1; } } } - fn inline_block(&mut self, caller: &mut Function, block_idx: usize) -> bool { + fn inline_block( + &mut self, + caller: &mut Function, + block_idx: usize, + functions: &[Result], + ) -> bool { // Find the first inlined OpFunctionCall let call = caller.blocks[block_idx] .instructions @@ -559,8 +579,8 @@ impl Inliner<'_, '_> { ( index, inst, - self.functions - .get(&inst.operands[0].id_ref_any().unwrap()) + functions[self.func_id_to_idx[&inst.operands[0].id_ref_any().unwrap()]] + .as_ref() .unwrap(), ) }) @@ -570,19 +590,38 @@ impl Inliner<'_, '_> { call_inst: inst, }; match should_inline( - self.legal_globals, - self.functions_that_may_abort, + &self.legal_globals, + &self.functions_that_may_abort, f, - Some(call_site), + call_site, ) { Ok(inline) => inline, - Err(MustInlineToLegalize) => true, + Err(MustInlineToLegalize(cause)) => { + if has_dont_inline(f) { + self.inlined_dont_inlines_to_cause_and_callers + .entry(f.def_id().unwrap()) + .or_insert_with(|| (cause, Default::default())) + .1 + .insert(caller.def_id().unwrap()); + } + true + } } }); let (call_index, call_inst, callee) = match call { None => return false, Some(call) => call, }; + + // Propagate "may abort" from callee to caller (i.e. as aborts get inlined). + if self + .functions_that_may_abort + .contains(&callee.def_id().unwrap()) + { + self.functions_that_may_abort + .insert(caller.def_id().unwrap()); + } + let call_result_type = { let ty = call_inst.result_type.unwrap(); if ty == self.op_type_void_id { @@ -593,17 +632,28 @@ impl Inliner<'_, '_> { }; let call_result_id = call_inst.result_id.unwrap(); - // Get the debuginfo instructions that apply to the call. + // Get the debug "source location" instruction that applies to the call. let custom_ext_inst_set_import = self.custom_ext_inst_set_import; - let call_debug_insts = caller.blocks[block_idx].instructions[..call_index] + let call_debug_src_loc_inst = caller.blocks[block_idx].instructions[..call_index] .iter() - .filter(|inst| match inst.class.opcode { - Op::Line | Op::NoLine => true, - Op::ExtInst if inst.operands[0].unwrap_id_ref() == custom_ext_inst_set_import => { - CustomOp::decode_from_ext_inst(inst).is_debuginfo() - } - _ => false, - }); + .rev() + .find_map(|inst| { + Some(match inst.class.opcode { + Op::Line => Some(inst), + Op::NoLine => None, + Op::ExtInst + if inst.operands[0].unwrap_id_ref() == custom_ext_inst_set_import => + { + match CustomOp::decode_from_ext_inst(inst) { + CustomOp::SetDebugSrcLoc => Some(inst), + CustomOp::ClearDebugSrcLoc => None, + _ => return None, + } + } + _ => return None, + }) + }) + .flatten(); // Rewrite parameters to arguments let call_arguments = call_inst @@ -624,9 +674,12 @@ impl Inliner<'_, '_> { }; let return_jump = self.id(); // Rewrite OpReturns of the callee. - #[allow(clippy::needless_borrow)] - let (mut inlined_callee_blocks, extra_debug_insts_pre_call, extra_debug_insts_post_call) = - self.get_inlined_blocks(&callee, call_debug_insts, return_variable, return_jump); + let mut inlined_callee_blocks = self.get_inlined_blocks( + callee, + call_debug_src_loc_inst, + return_variable, + return_jump, + ); // Clone the IDs of the callee, because otherwise they'd be defined multiple times if the // fn is inlined multiple times. self.add_clone_id_rules(&mut rewrite_rules, &inlined_callee_blocks); @@ -648,13 +701,6 @@ impl Inliner<'_, '_> { .unwrap(); assert!(call.class.opcode == Op::FunctionCall); - // HACK(eddyb) inject the additional debuginfo instructions generated by - // `get_inlined_blocks`, so the inlined call frame "stack" isn't corrupted. - caller.blocks[pre_call_block_idx] - .instructions - .extend(extra_debug_insts_pre_call); - post_call_block_insts.splice(0..0, extra_debug_insts_post_call); - if let Some(call_result_type) = call_result_type { // Generate the storage space for the return value: Do this *after* the split above, // because if block_idx=0, inserting a variable here shifts call_index. @@ -849,58 +895,19 @@ impl Inliner<'_, '_> { } } - // HACK(eddyb) the second and third return values are additional debuginfo - // instructions that need to be inserted just before/after the callsite. - fn get_inlined_blocks<'a>( + fn get_inlined_blocks( &mut self, callee: &Function, - call_debug_insts: impl Iterator, + call_debug_src_loc_inst: Option<&Instruction>, return_variable: Option, return_jump: Word, - ) -> ( - Vec, - SmallVec<[Instruction; 8]>, - SmallVec<[Instruction; 8]>, - ) { + ) -> Vec { let Self { custom_ext_inst_set_import, op_type_void_id, .. } = *self; - // HACK(eddyb) this is terrible, but we have to deal with it because of - // how this inliner is outside-in, instead of inside-out, meaning that - // context builds up "outside" of the callee blocks, inside the caller. - let mut enclosing_inlined_frames = SmallVec::<[_; 8]>::new(); - let mut current_debug_src_loc_inst = None; - for inst in call_debug_insts { - match inst.class.opcode { - Op::Line => current_debug_src_loc_inst = Some(inst), - Op::NoLine => current_debug_src_loc_inst = None, - Op::ExtInst - if inst.operands[0].unwrap_id_ref() == self.custom_ext_inst_set_import => - { - match CustomOp::decode_from_ext_inst(inst) { - CustomOp::SetDebugSrcLoc => current_debug_src_loc_inst = Some(inst), - CustomOp::ClearDebugSrcLoc => current_debug_src_loc_inst = None, - CustomOp::PushInlinedCallFrame => { - enclosing_inlined_frames - .push((current_debug_src_loc_inst.take(), inst)); - } - CustomOp::PopInlinedCallFrame => { - if let Some((callsite_debug_src_loc_inst, _)) = - enclosing_inlined_frames.pop() - { - current_debug_src_loc_inst = callsite_debug_src_loc_inst; - } - } - CustomOp::Abort => {} - } - } - _ => {} - } - } - // Prepare the debuginfo insts to prepend/append to every block. // FIXME(eddyb) this could be more efficient if we only used one pair of // `{Push,Pop}InlinedCallFrame` for the whole inlined callee, but there @@ -925,7 +932,7 @@ impl Inliner<'_, '_> { )); id }); - let mut mk_debuginfo_prefix_and_suffix = |include_callee_frame| { + let mut mk_debuginfo_prefix_and_suffix = || { // NOTE(eddyb) `OpExtInst`s have a result ID, even if unused, and // it has to be unique (same goes for the other instructions below). let instantiate_debuginfo = |this: &mut Self, inst: &Instruction| { @@ -949,33 +956,18 @@ impl Inliner<'_, '_> { .collect(), ) }; - // FIXME(eddyb) this only allocates to avoid borrow conflicts. - let mut prefix = SmallVec::<[_; 8]>::new(); - let mut suffix = SmallVec::<[_; 8]>::new(); - for &(callsite_debug_src_loc_inst, push_inlined_call_frame_inst) in - &enclosing_inlined_frames - { - prefix.extend( - callsite_debug_src_loc_inst - .into_iter() - .chain([push_inlined_call_frame_inst]) - .map(|inst| instantiate_debuginfo(self, inst)), - ); - suffix.push(custom_inst_to_inst(self, CustomInst::PopInlinedCallFrame)); - } - prefix.extend(current_debug_src_loc_inst.map(|inst| instantiate_debuginfo(self, inst))); - - if include_callee_frame { - prefix.push(custom_inst_to_inst( - self, - CustomInst::PushInlinedCallFrame { - callee_name: Operand::IdRef(callee_name_id), - }, - )); - suffix.push(custom_inst_to_inst(self, CustomInst::PopInlinedCallFrame)); - } - (prefix, suffix) + ( + (call_debug_src_loc_inst.map(|inst| instantiate_debuginfo(self, inst))) + .into_iter() + .chain([custom_inst_to_inst( + self, + CustomInst::PushInlinedCallFrame { + callee_name: Operand::IdRef(callee_name_id), + }, + )]), + [custom_inst_to_inst(self, CustomInst::PopInlinedCallFrame)], + ) }; let mut blocks = callee.blocks.clone(); @@ -1029,7 +1021,7 @@ impl Inliner<'_, '_> { // HACK(eddyb) avoid adding debuginfo to otherwise-empty blocks. if block.instructions.len() > num_phis { - let (debuginfo_prefix, debuginfo_suffix) = mk_debuginfo_prefix_and_suffix(true); + let (debuginfo_prefix, debuginfo_suffix) = mk_debuginfo_prefix_and_suffix(); // Insert the prefix debuginfo instructions after `OpPhi`s, // which sadly can't be covered by them. block @@ -1043,13 +1035,7 @@ impl Inliner<'_, '_> { block.instructions.push(terminator); } - let (caller_restore_debuginfo_after_call, calleer_reset_debuginfo_before_call) = - mk_debuginfo_prefix_and_suffix(false); - ( - blocks, - calleer_reset_debuginfo_before_call, - caller_restore_debuginfo_after_call, - ) + blocks } fn insert_opvariables(&self, block: &mut Block, insts: impl IntoIterator) { diff --git a/crates/rustc_codegen_spirv/src/linker/ipo.rs b/crates/rustc_codegen_spirv/src/linker/ipo.rs index 475f5c50c1..96c7bbe6ae 100644 --- a/crates/rustc_codegen_spirv/src/linker/ipo.rs +++ b/crates/rustc_codegen_spirv/src/linker/ipo.rs @@ -4,7 +4,7 @@ use indexmap::IndexSet; use rspirv::dr::Module; -use rspirv::spirv::Op; +use rspirv::spirv::{Op, Word}; use rustc_data_structures::fx::FxHashMap; // FIXME(eddyb) use newtyped indices and `IndexVec`. @@ -19,6 +19,9 @@ pub struct CallGraph { impl CallGraph { pub fn collect(module: &Module) -> Self { + Self::collect_with_func_id_to_idx(module).0 + } + pub fn collect_with_func_id_to_idx(module: &Module) -> (Self, FxHashMap) { let func_id_to_idx: FxHashMap<_, _> = module .functions .iter() @@ -51,10 +54,13 @@ impl CallGraph { .collect() }) .collect(); - Self { - entry_points, - callees, - } + ( + Self { + entry_points, + callees, + }, + func_id_to_idx, + ) } /// Order functions using a post-order traversal, i.e. callees before callers. diff --git a/tests/compiletests/ui/dis/panic_builtin_bounds_check.stderr b/tests/compiletests/ui/dis/panic_builtin_bounds_check.stderr index 5b2ad4eb9a..a3bee642b1 100644 --- a/tests/compiletests/ui/dis/panic_builtin_bounds_check.stderr +++ b/tests/compiletests/ui/dis/panic_builtin_bounds_check.stderr @@ -28,14 +28,13 @@ OpLine %5 32 4 %21 = OpVariable %12 Function OpLine %5 32 23 %22 = OpCompositeConstruct %6 %13 %14 %15 %16 -OpLine %5 32 4 +OpLine %5 27 4 %23 = OpCompositeExtract %9 %22 0 %24 = OpCompositeExtract %9 %22 1 %25 = OpCompositeExtract %9 %22 2 %26 = OpCompositeExtract %9 %22 3 %27 = OpCompositeConstruct %11 %23 %24 %25 %26 OpStore %21 %27 -OpLine %5 27 4 %28 = OpULessThan %17 %18 %10 OpNoLine OpSelectionMerge %29 None diff --git a/tests/compiletests/ui/lang/consts/nested-ref.stderr b/tests/compiletests/ui/lang/consts/nested-ref.stderr index 692ea48ed7..805ec9b607 100644 --- a/tests/compiletests/ui/lang/consts/nested-ref.stderr +++ b/tests/compiletests/ui/lang/consts/nested-ref.stderr @@ -1,6 +1,20 @@ -warning: `#[inline(never)]` function `nested_ref::deep_load` needs to be inlined because it has illegal argument or return types +warning: `#[inline(never)]` function `nested_ref::deep_load` has been inlined + --> $DIR/nested-ref.rs:12:4 + | +LL | fn deep_load(r: &'static &'static u32) -> u32 { + | ^^^^^^^^^ + | + = note: inlining was required due to illegal parameter type + = note: called from `nested_ref::main` -warning: `#[inline(never)]` function `nested_ref::deep_transpose` needs to be inlined because it has illegal argument or return types +warning: `#[inline(never)]` function `nested_ref::deep_transpose` has been inlined + --> $DIR/nested-ref.rs:19:4 + | +LL | fn deep_transpose(r: &'static &'static Mat2) -> Mat2 { + | ^^^^^^^^^^^^^^ + | + = note: inlining was required due to illegal parameter type + = note: called from `nested_ref::main` warning: 2 warnings emitted diff --git a/tests/compiletests/ui/lang/core/ref/member_ref_arg-broken.stderr b/tests/compiletests/ui/lang/core/ref/member_ref_arg-broken.stderr index 35f37f16a8..4a41bfb462 100644 --- a/tests/compiletests/ui/lang/core/ref/member_ref_arg-broken.stderr +++ b/tests/compiletests/ui/lang/core/ref/member_ref_arg-broken.stderr @@ -1,6 +1,38 @@ -warning: `#[inline(never)]` function `member_ref_arg_broken::h` needs to be inlined because it has illegal argument or return types +warning: `#[inline(never)]` function `member_ref_arg_broken::f` has been inlined + --> $DIR/member_ref_arg-broken.rs:20:4 + | +LL | fn f(x: &u32) -> u32 { + | ^ + | + = note: inlining was required due to illegal (pointer) argument + = note: called from `member_ref_arg_broken::main` + +warning: `#[inline(never)]` function `member_ref_arg_broken::g` has been inlined + --> $DIR/member_ref_arg-broken.rs:25:4 + | +LL | fn g(xy: (&u32, &u32)) -> (u32, u32) { + | ^ + | + = note: inlining was required due to illegal (pointer) argument + = note: called from `member_ref_arg_broken::main` -warning: `#[inline(never)]` function `member_ref_arg_broken::h_newtyped` needs to be inlined because it has illegal argument or return types +warning: `#[inline(never)]` function `member_ref_arg_broken::h` has been inlined + --> $DIR/member_ref_arg-broken.rs:30:4 + | +LL | fn h(xyz: (&u32, &u32, &u32)) -> (u32, u32, u32) { + | ^ + | + = note: inlining was required due to illegal parameter type + = note: called from `member_ref_arg_broken::main` + +warning: `#[inline(never)]` function `member_ref_arg_broken::h_newtyped` has been inlined + --> $DIR/member_ref_arg-broken.rs:41:4 + | +LL | fn h_newtyped(xyz: ((&u32, &u32, &u32),)) -> (u32, u32, u32) { + | ^^^^^^^^^^ + | + = note: inlining was required due to illegal parameter type + = note: called from `member_ref_arg_broken::main` error: error:0:0 - OpLoad Pointer '$ID[%$ID]' is not a logical pointer. %39 = OpLoad %uint %38 @@ -8,5 +40,5 @@ error: error:0:0 - OpLoad Pointer '$ID[%$ID]' is not a logical pointer. = note: spirv-val failed = note: module `$TEST_BUILD_DIR/lang/core/ref/member_ref_arg-broken` -error: aborting due to 1 previous error; 2 warnings emitted +error: aborting due to 1 previous error; 4 warnings emitted diff --git a/tests/compiletests/ui/lang/core/ref/member_ref_arg.stderr b/tests/compiletests/ui/lang/core/ref/member_ref_arg.stderr new file mode 100644 index 0000000000..5ec9e260b4 --- /dev/null +++ b/tests/compiletests/ui/lang/core/ref/member_ref_arg.stderr @@ -0,0 +1,20 @@ +warning: `#[inline(never)]` function `member_ref_arg::f` has been inlined + --> $DIR/member_ref_arg.rs:14:4 + | +LL | fn f(x: &u32) {} + | ^ + | + = note: inlining was required due to illegal (pointer) argument + = note: called from `member_ref_arg::main` + +warning: `#[inline(never)]` function `member_ref_arg::g` has been inlined + --> $DIR/member_ref_arg.rs:17:4 + | +LL | fn g(xy: (&u32, &u32)) {} + | ^ + | + = note: inlining was required due to illegal (pointer) argument + = note: called from `member_ref_arg::main` + +warning: 2 warnings emitted + diff --git a/tests/compiletests/ui/lang/panic/track_caller.stderr b/tests/compiletests/ui/lang/panic/track_caller.stderr new file mode 100644 index 0000000000..b18b2fed0d --- /dev/null +++ b/tests/compiletests/ui/lang/panic/track_caller.stderr @@ -0,0 +1,11 @@ +warning: `#[inline(never)]` function `track_caller::track_caller_maybe_panic::panic_cold_explicit` has been inlined + --> $DIR/track_caller.rs:10:9 + | +LL | panic!(); + | ^^^^^^^^ + | + = note: inlining was required due to panicking + = note: called from `track_caller::track_caller_maybe_panic` + +warning: 1 warning emitted + From fe22a3b3a66c8367db901bc5893e689c250df14d Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 9 Sep 2025 11:40:02 +0300 Subject: [PATCH 11/23] [2024] linker/mem2reg: index SPIR-V blocks by their label IDs for O(1) lookup. --- crates/rustc_codegen_spirv/src/linker/dce.rs | 11 ++- .../rustc_codegen_spirv/src/linker/mem2reg.rs | 91 +++++++++++-------- crates/rustc_codegen_spirv/src/linker/mod.rs | 7 +- 3 files changed, 63 insertions(+), 46 deletions(-) diff --git a/crates/rustc_codegen_spirv/src/linker/dce.rs b/crates/rustc_codegen_spirv/src/linker/dce.rs index 4be10c0d5b..20bcbd3310 100644 --- a/crates/rustc_codegen_spirv/src/linker/dce.rs +++ b/crates/rustc_codegen_spirv/src/linker/dce.rs @@ -7,9 +7,10 @@ //! *references* a rooted thing is also rooted, not the other way around - but that's the basic //! concept. -use rspirv::dr::{Function, Instruction, Module, Operand}; +use rspirv::dr::{Block, Function, Instruction, Module, Operand}; use rspirv::spirv::{Decoration, LinkageType, Op, StorageClass, Word}; -use rustc_data_structures::fx::FxIndexSet; +use rustc_data_structures::fx::{FxIndexMap, FxIndexSet}; +use std::hash::Hash; pub fn dce(module: &mut Module) { let mut rooted = collect_roots(module); @@ -137,11 +138,11 @@ fn kill_unrooted(module: &mut Module, rooted: &FxIndexSet) { } } -pub fn dce_phi(func: &mut Function) { +pub fn dce_phi(blocks: &mut FxIndexMap) { let mut used = FxIndexSet::default(); loop { let mut changed = false; - for inst in func.all_inst_iter() { + for inst in blocks.values().flat_map(|block| &block.instructions) { if inst.class.opcode != Op::Phi || used.contains(&inst.result_id.unwrap()) { for op in &inst.operands { if let Some(id) = op.id_ref_any() { @@ -154,7 +155,7 @@ pub fn dce_phi(func: &mut Function) { break; } } - for block in &mut func.blocks { + for block in blocks.values_mut() { block .instructions .retain(|inst| inst.class.opcode != Op::Phi || used.contains(&inst.result_id.unwrap())); diff --git a/crates/rustc_codegen_spirv/src/linker/mem2reg.rs b/crates/rustc_codegen_spirv/src/linker/mem2reg.rs index 16889cbcc3..df2434d63d 100644 --- a/crates/rustc_codegen_spirv/src/linker/mem2reg.rs +++ b/crates/rustc_codegen_spirv/src/linker/mem2reg.rs @@ -13,10 +13,14 @@ use super::simple_passes::outgoing_edges; use super::{apply_rewrite_rules, id}; use rspirv::dr::{Block, Function, Instruction, ModuleHeader, Operand}; use rspirv::spirv::{Op, Word}; -use rustc_data_structures::fx::{FxHashMap, FxHashSet}; +use rustc_data_structures::fx::{FxHashMap, FxHashSet, FxIndexMap}; use rustc_middle::bug; use std::collections::hash_map; +// HACK(eddyb) newtype instead of type alias to avoid mistakes. +#[derive(Copy, Clone, PartialEq, Eq, Hash)] +struct LabelId(Word); + pub fn mem2reg( header: &mut ModuleHeader, types_global_values: &mut Vec, @@ -24,8 +28,16 @@ pub fn mem2reg( constants: &FxHashMap, func: &mut Function, ) { - let reachable = compute_reachable(&func.blocks); - let preds = compute_preds(&func.blocks, &reachable); + // HACK(eddyb) this ad-hoc indexing might be useful elsewhere as well, but + // it's made completely irrelevant by SPIR-T so only applies to legacy code. + let mut blocks: FxIndexMap<_, _> = func + .blocks + .iter_mut() + .map(|block| (LabelId(block.label_id().unwrap()), block)) + .collect(); + + let reachable = compute_reachable(&blocks); + let preds = compute_preds(&blocks, &reachable); let idom = compute_idom(&preds, &reachable); let dominance_frontier = compute_dominance_frontier(&preds, &idom); loop { @@ -34,31 +46,27 @@ pub fn mem2reg( types_global_values, pointer_to_pointee, constants, - &mut func.blocks, + &mut blocks, &dominance_frontier, ); if !changed { break; } // mem2reg produces minimal SSA form, not pruned, so DCE the dead ones - super::dce::dce_phi(func); + super::dce::dce_phi(&mut blocks); } } -fn label_to_index(blocks: &[Block], id: Word) -> usize { - blocks - .iter() - .position(|b| b.label_id().unwrap() == id) - .unwrap() -} - -fn compute_reachable(blocks: &[Block]) -> Vec { - fn recurse(blocks: &[Block], reachable: &mut [bool], block: usize) { +fn compute_reachable(blocks: &FxIndexMap) -> Vec { + fn recurse(blocks: &FxIndexMap, reachable: &mut [bool], block: usize) { if !reachable[block] { reachable[block] = true; - for dest_id in outgoing_edges(&blocks[block]) { - let dest_idx = label_to_index(blocks, dest_id); - recurse(blocks, reachable, dest_idx); + for dest_id in outgoing_edges(blocks[block]) { + recurse( + blocks, + reachable, + blocks.get_index_of(&LabelId(dest_id)).unwrap(), + ); } } } @@ -67,17 +75,19 @@ fn compute_reachable(blocks: &[Block]) -> Vec { reachable } -fn compute_preds(blocks: &[Block], reachable_blocks: &[bool]) -> Vec> { +fn compute_preds( + blocks: &FxIndexMap, + reachable_blocks: &[bool], +) -> Vec> { let mut result = vec![vec![]; blocks.len()]; // Do not count unreachable blocks as valid preds of blocks for (source_idx, source) in blocks - .iter() + .values() .enumerate() .filter(|&(b, _)| reachable_blocks[b]) { for dest_id in outgoing_edges(source) { - let dest_idx = label_to_index(blocks, dest_id); - result[dest_idx].push(source_idx); + result[blocks.get_index_of(&LabelId(dest_id)).unwrap()].push(source_idx); } } result @@ -161,7 +171,7 @@ fn insert_phis_all( types_global_values: &mut Vec, pointer_to_pointee: &FxHashMap, constants: &FxHashMap, - blocks: &mut [Block], + blocks: &mut FxIndexMap, dominance_frontier: &[FxHashSet], ) -> bool { let var_maps_and_types = blocks[0] @@ -198,7 +208,11 @@ fn insert_phis_all( rewrite_rules: FxHashMap::default(), }; renamer.rename(0, None); - apply_rewrite_rules(&renamer.rewrite_rules, blocks); + // FIXME(eddyb) shouldn't this full rescan of the function be done once? + apply_rewrite_rules( + &renamer.rewrite_rules, + blocks.values_mut().map(|block| &mut **block), + ); remove_nops(blocks); } remove_old_variables(blocks, &var_maps_and_types); @@ -216,7 +230,7 @@ struct VarInfo { fn collect_access_chains( pointer_to_pointee: &FxHashMap, constants: &FxHashMap, - blocks: &[Block], + blocks: &FxIndexMap, base_var: Word, base_var_ty: Word, ) -> Option> { @@ -249,7 +263,7 @@ fn collect_access_chains( // Loop in case a previous block references a later AccessChain loop { let mut changed = false; - for inst in blocks.iter().flat_map(|b| &b.instructions) { + for inst in blocks.values().flat_map(|b| &b.instructions) { for (index, op) in inst.operands.iter().enumerate() { if let Operand::IdRef(id) = op && variables.contains_key(id) @@ -303,10 +317,10 @@ fn collect_access_chains( // same var map (e.g. `s.x = s.y;`). fn split_copy_memory( header: &mut ModuleHeader, - blocks: &mut [Block], + blocks: &mut FxIndexMap, var_map: &FxHashMap, ) { - for block in blocks { + for block in blocks.values_mut() { let mut inst_index = 0; while inst_index < block.instructions.len() { let inst = &block.instructions[inst_index]; @@ -365,7 +379,7 @@ fn has_store(block: &Block, var_map: &FxHashMap) -> bool { } fn insert_phis( - blocks: &[Block], + blocks: &FxIndexMap, dominance_frontier: &[FxHashSet], var_map: &FxHashMap, ) -> FxHashSet { @@ -374,7 +388,7 @@ fn insert_phis( let mut ever_on_work_list = FxHashSet::default(); let mut work_list = Vec::new(); let mut blocks_with_phi = FxHashSet::default(); - for (block_idx, block) in blocks.iter().enumerate() { + for (block_idx, block) in blocks.values().enumerate() { if has_store(block, var_map) { ever_on_work_list.insert(block_idx); work_list.push(block_idx); @@ -419,10 +433,10 @@ fn top_stack_or_undef( } } -struct Renamer<'a> { +struct Renamer<'a, 'b> { header: &'a mut ModuleHeader, types_global_values: &'a mut Vec, - blocks: &'a mut [Block], + blocks: &'a mut FxIndexMap, blocks_with_phi: FxHashSet, base_var_type: Word, var_map: &'a FxHashMap, @@ -432,7 +446,7 @@ struct Renamer<'a> { rewrite_rules: FxHashMap, } -impl Renamer<'_> { +impl Renamer<'_, '_> { // Returns the phi definition. fn insert_phi_value(&mut self, block: usize, from_block: usize) -> Word { let from_block_label = self.blocks[from_block].label_id().unwrap(); @@ -554,9 +568,8 @@ impl Renamer<'_> { } } - for dest_id in outgoing_edges(&self.blocks[block]).collect::>() { - // TODO: Don't do this find - let dest_idx = label_to_index(self.blocks, dest_id); + for dest_id in outgoing_edges(self.blocks[block]).collect::>() { + let dest_idx = self.blocks.get_index_of(&LabelId(dest_id)).unwrap(); self.rename(dest_idx, Some(block)); } @@ -566,8 +579,8 @@ impl Renamer<'_> { } } -fn remove_nops(blocks: &mut [Block]) { - for block in blocks { +fn remove_nops(blocks: &mut FxIndexMap) { + for block in blocks.values_mut() { block .instructions .retain(|inst| inst.class.opcode != Op::Nop); @@ -575,7 +588,7 @@ fn remove_nops(blocks: &mut [Block]) { } fn remove_old_variables( - blocks: &mut [Block], + blocks: &mut FxIndexMap, var_maps_and_types: &[(FxHashMap, u32)], ) { blocks[0].instructions.retain(|inst| { @@ -586,7 +599,7 @@ fn remove_old_variables( .all(|(var_map, _)| !var_map.contains_key(&result_id)) } }); - for block in blocks { + for block in blocks.values_mut() { block.instructions.retain(|inst| { !matches!(inst.class.opcode, Op::AccessChain | Op::InBoundsAccessChain) || inst.operands.iter().all(|op| { diff --git a/crates/rustc_codegen_spirv/src/linker/mod.rs b/crates/rustc_codegen_spirv/src/linker/mod.rs index e6c5bf3552..e0d612cab2 100644 --- a/crates/rustc_codegen_spirv/src/linker/mod.rs +++ b/crates/rustc_codegen_spirv/src/linker/mod.rs @@ -91,9 +91,12 @@ fn id(header: &mut ModuleHeader) -> Word { result } -fn apply_rewrite_rules(rewrite_rules: &FxHashMap, blocks: &mut [Block]) { +fn apply_rewrite_rules<'a>( + rewrite_rules: &FxHashMap, + blocks: impl IntoIterator, +) { let all_ids_mut = blocks - .iter_mut() + .into_iter() .flat_map(|b| b.label.iter_mut().chain(b.instructions.iter_mut())) .flat_map(|inst| { inst.result_id From 5db1f6bdd6780b473de406aa6b8028d8bcc7d266 Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 9 Sep 2025 11:40:02 +0300 Subject: [PATCH 12/23] [2024] linker/inline: use `OpPhi` instead of `OpVariable` for return values. --- .../rustc_codegen_spirv/src/linker/inline.rs | 136 +++++++++--------- .../ui/dis/complex_image_sample_inst.stderr | 21 ++- 2 files changed, 77 insertions(+), 80 deletions(-) diff --git a/crates/rustc_codegen_spirv/src/linker/inline.rs b/crates/rustc_codegen_spirv/src/linker/inline.rs index e2aac8c548..0ab19bd40f 100644 --- a/crates/rustc_codegen_spirv/src/linker/inline.rs +++ b/crates/rustc_codegen_spirv/src/linker/inline.rs @@ -94,7 +94,6 @@ pub fn inline(sess: &Session, module: &mut Module) -> super::Result<()> { header, debug_string_source: &mut module.debug_string_source, annotations: &mut module.annotations, - types_global_values: &mut module.types_global_values, legal_globals, @@ -493,7 +492,6 @@ struct Inliner<'a, 'b> { header: &'b mut ModuleHeader, debug_string_source: &'b mut Vec, annotations: &'b mut Vec, - types_global_values: &'b mut Vec, legal_globals: FxHashMap, functions_that_may_abort: FxHashSet, @@ -523,29 +521,6 @@ impl Inliner<'_, '_> { } } - fn ptr_ty(&mut self, pointee: Word) -> Word { - // TODO: This is horribly slow, fix this - let existing = self.types_global_values.iter().find(|inst| { - inst.class.opcode == Op::TypePointer - && inst.operands[0].unwrap_storage_class() == StorageClass::Function - && inst.operands[1].unwrap_id_ref() == pointee - }); - if let Some(existing) = existing { - return existing.result_id.unwrap(); - } - let inst_id = self.id(); - self.types_global_values.push(Instruction::new( - Op::TypePointer, - None, - Some(inst_id), - vec![ - Operand::StorageClass(StorageClass::Function), - Operand::IdRef(pointee), - ], - )); - inst_id - } - fn inline_fn( &mut self, function: &mut Function, @@ -622,15 +597,19 @@ impl Inliner<'_, '_> { .insert(caller.def_id().unwrap()); } - let call_result_type = { + let mut maybe_call_result_phi = { let ty = call_inst.result_type.unwrap(); if ty == self.op_type_void_id { None } else { - Some(ty) + Some(Instruction::new( + Op::Phi, + Some(ty), + Some(call_inst.result_id.unwrap()), + vec![], + )) } }; - let call_result_id = call_inst.result_id.unwrap(); // Get the debug "source location" instruction that applies to the call. let custom_ext_inst_set_import = self.custom_ext_inst_set_import; @@ -667,17 +646,12 @@ impl Inliner<'_, '_> { }); let mut rewrite_rules = callee_parameters.zip(call_arguments).collect(); - let return_variable = if call_result_type.is_some() { - Some(self.id()) - } else { - None - }; let return_jump = self.id(); // Rewrite OpReturns of the callee. let mut inlined_callee_blocks = self.get_inlined_blocks( callee, call_debug_src_loc_inst, - return_variable, + maybe_call_result_phi.as_mut(), return_jump, ); // Clone the IDs of the callee, because otherwise they'd be defined multiple times if the @@ -686,6 +660,55 @@ impl Inliner<'_, '_> { apply_rewrite_rules(&rewrite_rules, &mut inlined_callee_blocks); self.apply_rewrite_for_decorations(&rewrite_rules); + if let Some(call_result_phi) = &mut maybe_call_result_phi { + // HACK(eddyb) new IDs should be generated earlier, to avoid pushing + // callee IDs to `call_result_phi.operands` only to rewrite them here. + for op in &mut call_result_phi.operands { + if let Some(id) = op.id_ref_any_mut() + && let Some(&rewrite) = rewrite_rules.get(id) + { + *id = rewrite; + } + } + + // HACK(eddyb) this special-casing of the single-return case is + // really necessary for passes like `mem2reg` which are not capable + // of skipping through the extraneous `OpPhi`s on their own. + if let [returned_value, _return_block] = &call_result_phi.operands[..] { + let call_result_id = call_result_phi.result_id.unwrap(); + let returned_value_id = returned_value.unwrap_id_ref(); + + maybe_call_result_phi = None; + + // HACK(eddyb) this is a conservative approximation of all the + // instructions that could potentially reference the call result. + let reaching_insts = { + let (pre_call_blocks, call_and_post_call_blocks) = + caller.blocks.split_at_mut(block_idx); + (pre_call_blocks.iter_mut().flat_map(|block| { + block + .instructions + .iter_mut() + .take_while(|inst| inst.class.opcode == Op::Phi) + })) + .chain( + call_and_post_call_blocks + .iter_mut() + .flat_map(|block| &mut block.instructions), + ) + }; + for reaching_inst in reaching_insts { + for op in &mut reaching_inst.operands { + if let Some(id) = op.id_ref_any_mut() + && *id == call_result_id + { + *id = returned_value_id; + } + } + } + } + } + // Split the block containing the `OpFunctionCall` into pre-call vs post-call. let pre_call_block_idx = block_idx; #[expect(unused)] @@ -701,18 +724,6 @@ impl Inliner<'_, '_> { .unwrap(); assert!(call.class.opcode == Op::FunctionCall); - if let Some(call_result_type) = call_result_type { - // Generate the storage space for the return value: Do this *after* the split above, - // because if block_idx=0, inserting a variable here shifts call_index. - let ret_var_inst = Instruction::new( - Op::Variable, - Some(self.ptr_ty(call_result_type)), - Some(return_variable.unwrap()), - vec![Operand::StorageClass(StorageClass::Function)], - ); - self.insert_opvariables(&mut caller.blocks[0], [ret_var_inst]); - } - // Insert non-entry inlined callee blocks just after the pre-call block. let non_entry_inlined_callee_blocks = inlined_callee_blocks.drain(1..); let num_non_entry_inlined_callee_blocks = non_entry_inlined_callee_blocks.len(); @@ -721,18 +732,9 @@ impl Inliner<'_, '_> { non_entry_inlined_callee_blocks, ); - if let Some(call_result_type) = call_result_type { - // Add the load of the result value after the inlined function. Note there's guaranteed no - // OpPhi instructions since we just split this block. - post_call_block_insts.insert( - 0, - Instruction::new( - Op::Load, - Some(call_result_type), - Some(call_result_id), - vec![Operand::IdRef(return_variable.unwrap())], - ), - ); + if let Some(call_result_phi) = maybe_call_result_phi { + // Add the `OpPhi` for the call result value, after the inlined function. + post_call_block_insts.insert(0, call_result_phi); } // Insert the post-call block, after all the inlined callee blocks. @@ -899,7 +901,7 @@ impl Inliner<'_, '_> { &mut self, callee: &Function, call_debug_src_loc_inst: Option<&Instruction>, - return_variable: Option, + mut maybe_call_result_phi: Option<&mut Instruction>, return_jump: Word, ) -> Vec { let Self { @@ -997,17 +999,13 @@ impl Inliner<'_, '_> { if let Op::Return | Op::ReturnValue = terminator.class.opcode { if Op::ReturnValue == terminator.class.opcode { let return_value = terminator.operands[0].id_ref_any().unwrap(); - block.instructions.push(Instruction::new( - Op::Store, - None, - None, - vec![ - Operand::IdRef(return_variable.unwrap()), - Operand::IdRef(return_value), - ], - )); + let call_result_phi = maybe_call_result_phi.as_deref_mut().unwrap(); + call_result_phi.operands.extend([ + Operand::IdRef(return_value), + Operand::IdRef(block.label_id().unwrap()), + ]); } else { - assert!(return_variable.is_none()); + assert!(maybe_call_result_phi.is_none()); } terminator = Instruction::new(Op::Branch, None, None, vec![Operand::IdRef(return_jump)]); diff --git a/tests/compiletests/ui/dis/complex_image_sample_inst.stderr b/tests/compiletests/ui/dis/complex_image_sample_inst.stderr index 3d62336afd..b99b63a736 100644 --- a/tests/compiletests/ui/dis/complex_image_sample_inst.stderr +++ b/tests/compiletests/ui/dis/complex_image_sample_inst.stderr @@ -3,19 +3,18 @@ %5 = OpFunctionParameter %6 %7 = OpFunctionParameter %6 %8 = OpLabel -OpLine %9 10 25 -%10 = OpCompositeExtract %11 %5 0 -%12 = OpCompositeExtract %11 %5 1 -%13 = OpCompositeConstruct %6 %10 %12 -%14 = OpCompositeExtract %11 %7 0 -%15 = OpCompositeExtract %11 %7 1 -%16 = OpCompositeConstruct %6 %14 %15 -OpLine %9 29 13 +%9 = OpCompositeExtract %10 %5 0 +%11 = OpCompositeExtract %10 %5 1 +%12 = OpCompositeConstruct %6 %9 %11 +%13 = OpCompositeExtract %10 %7 0 +%14 = OpCompositeExtract %10 %7 1 +%15 = OpCompositeConstruct %6 %13 %14 +OpLine %16 29 13 %17 = OpAccessChain %18 %19 %20 -OpLine %9 30 13 +OpLine %16 30 13 %21 = OpLoad %22 %17 -OpLine %9 34 13 -%23 = OpImageSampleProjExplicitLod %2 %21 %4 Grad %13 %16 +OpLine %16 34 13 +%23 = OpImageSampleProjExplicitLod %2 %21 %4 Grad %12 %15 OpNoLine OpReturnValue %23 OpFunctionEnd From 1e4447426a486d385524049fba91128a9960c1f9 Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 9 Sep 2025 11:40:02 +0300 Subject: [PATCH 13/23] linker/inline: fix typos in comments. --- crates/rustc_codegen_spirv/src/linker/inline.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/rustc_codegen_spirv/src/linker/inline.rs b/crates/rustc_codegen_spirv/src/linker/inline.rs index 0ab19bd40f..a6abc0f2ee 100644 --- a/crates/rustc_codegen_spirv/src/linker/inline.rs +++ b/crates/rustc_codegen_spirv/src/linker/inline.rs @@ -133,7 +133,7 @@ pub fn inline(sess: &Session, module: &mut Module) -> super::Result<()> { .map(Ok) .collect(); - // Inline functions in post-order (aka inside-out aka bottom-out) - that is, + // Inline functions in post-order (aka inside-out aka bottom-up) - that is, // callees are processed before their callers, to avoid duplicating work. for func_idx in call_graph.post_order() { let mut function = mem::replace(&mut functions[func_idx], Err(FuncIsBeingInlined)).unwrap(); @@ -411,7 +411,7 @@ fn should_inline( } // If the call isn't passing a legal pointer argument (a "memory object", - // i.e. an `OpVariable` or one of the caller's `OpFunctionParameters), + // i.e. an `OpVariable` or one of the caller's `OpFunctionParameter`s), // then inlining is required to have a chance at producing legal SPIR-V. // // FIXME(eddyb) rewriting away the pointer could be another alternative. @@ -826,7 +826,7 @@ impl Inliner<'_, '_> { } // `vars_and_debuginfo_range.end` indicates where `OpVariable`s - // end and other instructions start (module debuginfo), but to + // end and other instructions start (modulo debuginfo), but to // split the block in two, both sides of the "cut" need "repair": // - the variables are missing "inlined call frames" pops, that // may happen later in the block, and have to be synthesized From c08b57951e92b4886a0a997c2167791ba4bbdf43 Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 9 Sep 2025 11:40:02 +0300 Subject: [PATCH 14/23] [TODO(eddyb) this is probably sound but requires looping for rewrites] [2024] WIP: mem2reg unsound changes? --- .../rustc_codegen_spirv/src/linker/mem2reg.rs | 17 +++++++++-------- crates/rustc_codegen_spirv/src/linker/mod.rs | 2 +- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/crates/rustc_codegen_spirv/src/linker/mem2reg.rs b/crates/rustc_codegen_spirv/src/linker/mem2reg.rs index df2434d63d..c6fd8c084e 100644 --- a/crates/rustc_codegen_spirv/src/linker/mem2reg.rs +++ b/crates/rustc_codegen_spirv/src/linker/mem2reg.rs @@ -193,6 +193,8 @@ fn insert_phis_all( for (var_map, _) in &var_maps_and_types { split_copy_memory(header, blocks, var_map); } + + let mut rewrite_rules = FxHashMap::default(); for &(ref var_map, base_var_type) in &var_maps_and_types { let blocks_with_phi = insert_phis(blocks, dominance_frontier, var_map); let mut renamer = Renamer { @@ -205,16 +207,15 @@ fn insert_phis_all( phi_defs: FxHashSet::default(), visited: FxHashSet::default(), stack: Vec::new(), - rewrite_rules: FxHashMap::default(), + rewrite_rules: &mut rewrite_rules, }; renamer.rename(0, None); - // FIXME(eddyb) shouldn't this full rescan of the function be done once? - apply_rewrite_rules( - &renamer.rewrite_rules, - blocks.values_mut().map(|block| &mut **block), - ); - remove_nops(blocks); } + apply_rewrite_rules( + &rewrite_rules, + blocks.values_mut().map(|block| &mut **block), + ); + remove_nops(blocks); remove_old_variables(blocks, &var_maps_and_types); true } @@ -443,7 +444,7 @@ struct Renamer<'a, 'b> { phi_defs: FxHashSet, visited: FxHashSet, stack: Vec, - rewrite_rules: FxHashMap, + rewrite_rules: &'a mut FxHashMap, } impl Renamer<'_, '_> { diff --git a/crates/rustc_codegen_spirv/src/linker/mod.rs b/crates/rustc_codegen_spirv/src/linker/mod.rs index e0d612cab2..487f2eae78 100644 --- a/crates/rustc_codegen_spirv/src/linker/mod.rs +++ b/crates/rustc_codegen_spirv/src/linker/mod.rs @@ -109,7 +109,7 @@ fn apply_rewrite_rules<'a>( ) }); for id in all_ids_mut { - if let Some(&rewrite) = rewrite_rules.get(id) { + while let Some(&rewrite) = rewrite_rules.get(id) { *id = rewrite; } } From 4d10050f7ed5e3228dcc64d77a3baa91fb0497a5 Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 9 Sep 2025 11:40:02 +0300 Subject: [PATCH 15/23] linker/inline: also run `remove_duplicate_debuginfo` on every fully-inlined function. --- .../rustc_codegen_spirv/src/linker/duplicates.rs | 16 +++++++++++++++- crates/rustc_codegen_spirv/src/linker/inline.rs | 6 ++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/crates/rustc_codegen_spirv/src/linker/duplicates.rs b/crates/rustc_codegen_spirv/src/linker/duplicates.rs index 76f85a7713..70554caddd 100644 --- a/crates/rustc_codegen_spirv/src/linker/duplicates.rs +++ b/crates/rustc_codegen_spirv/src/linker/duplicates.rs @@ -283,7 +283,20 @@ pub fn remove_duplicate_debuginfo(module: &mut Module) { }) .map(|inst| inst.result_id.unwrap()); + let deduper = DebuginfoDeduplicator { + custom_ext_inst_set_import, + }; for func in &mut module.functions { + deduper.remove_duplicate_debuginfo_in_function(func); + } +} + +pub struct DebuginfoDeduplicator { + pub custom_ext_inst_set_import: Option, +} + +impl DebuginfoDeduplicator { + pub fn remove_duplicate_debuginfo_in_function(&self, func: &mut rspirv::dr::Function) { for block in &mut func.blocks { // Ignore the terminator, it's effectively "outside" debuginfo. let (_, insts) = block.instructions.split_last_mut().unwrap(); @@ -339,7 +352,8 @@ pub fn remove_duplicate_debuginfo(module: &mut Module) { let inst = &insts[inst_idx]; let custom_op = match inst.class.opcode { Op::ExtInst - if Some(inst.operands[0].unwrap_id_ref()) == custom_ext_inst_set_import => + if Some(inst.operands[0].unwrap_id_ref()) + == self.custom_ext_inst_set_import => { Some(CustomOp::decode_from_ext_inst(inst)) } diff --git a/crates/rustc_codegen_spirv/src/linker/inline.rs b/crates/rustc_codegen_spirv/src/linker/inline.rs index a6abc0f2ee..46aa3c95f8 100644 --- a/crates/rustc_codegen_spirv/src/linker/inline.rs +++ b/crates/rustc_codegen_spirv/src/linker/inline.rs @@ -139,6 +139,12 @@ pub fn inline(sess: &Session, module: &mut Module) -> super::Result<()> { let mut function = mem::replace(&mut functions[func_idx], Err(FuncIsBeingInlined)).unwrap(); inliner.inline_fn(&mut function, &functions); fuse_trivial_branches(&mut function); + + super::duplicates::DebuginfoDeduplicator { + custom_ext_inst_set_import, + } + .remove_duplicate_debuginfo_in_function(&mut function); + functions[func_idx] = Ok(function); } From 5040aa28c8baaecb517a4477b8a3636b510f8270 Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 9 Sep 2025 11:40:02 +0300 Subject: [PATCH 16/23] linker/inline: also run `mem2reg` on every fully-inlined function. --- .../rustc_codegen_spirv/src/linker/inline.rs | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/crates/rustc_codegen_spirv/src/linker/inline.rs b/crates/rustc_codegen_spirv/src/linker/inline.rs index 46aa3c95f8..8e16a9555d 100644 --- a/crates/rustc_codegen_spirv/src/linker/inline.rs +++ b/crates/rustc_codegen_spirv/src/linker/inline.rs @@ -133,6 +133,32 @@ pub fn inline(sess: &Session, module: &mut Module) -> super::Result<()> { .map(Ok) .collect(); + let mut mem2reg_pointer_to_pointee = FxHashMap::default(); + let mut mem2reg_constants = FxHashMap::default(); + { + let mut u32 = None; + for inst in &module.types_global_values { + match inst.class.opcode { + Op::TypePointer => { + mem2reg_pointer_to_pointee + .insert(inst.result_id.unwrap(), inst.operands[1].unwrap_id_ref()); + } + Op::TypeInt + if inst.operands[0].unwrap_literal_bit32() == 32 + && inst.operands[1].unwrap_literal_bit32() == 0 => + { + assert!(u32.is_none()); + u32 = Some(inst.result_id.unwrap()); + } + Op::Constant if u32.is_some() && inst.result_type == u32 => { + let value = inst.operands[0].unwrap_literal_bit32(); + mem2reg_constants.insert(inst.result_id.unwrap(), value); + } + _ => {} + } + } + } + // Inline functions in post-order (aka inside-out aka bottom-up) - that is, // callees are processed before their callers, to avoid duplicating work. for func_idx in call_graph.post_order() { @@ -145,6 +171,19 @@ pub fn inline(sess: &Session, module: &mut Module) -> super::Result<()> { } .remove_duplicate_debuginfo_in_function(&mut function); + { + super::simple_passes::block_ordering_pass(&mut function); + // Note: mem2reg requires functions to be in RPO order (i.e. block_ordering_pass) + super::mem2reg::mem2reg( + inliner.header, + &mut module.types_global_values, + &mem2reg_pointer_to_pointee, + &mem2reg_constants, + &mut function, + ); + super::destructure_composites::destructure_composites(&mut function); + } + functions[func_idx] = Ok(function); } From d02aa41e48951b84ca9103f76311cb5448912c63 Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 9 Sep 2025 11:40:02 +0300 Subject: [PATCH 17/23] (placeholder) From 6f54c865f02594b7cbd6983daa1bff9381d148a4 Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 9 Sep 2025 11:40:02 +0300 Subject: [PATCH 18/23] linker/inline: don't assume `OpFunctionParameter`s are valid pointers (as it'd require outside-in inlining). --- crates/rustc_codegen_spirv/src/linker/inline.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/rustc_codegen_spirv/src/linker/inline.rs b/crates/rustc_codegen_spirv/src/linker/inline.rs index 8e16a9555d..837261323d 100644 --- a/crates/rustc_codegen_spirv/src/linker/inline.rs +++ b/crates/rustc_codegen_spirv/src/linker/inline.rs @@ -473,6 +473,7 @@ fn should_inline( .caller .parameters .iter() + .filter(|_| false) .chain( call_site.caller.blocks[0] .instructions From 696080eed32a39f767a6995073df576728f34273 Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 9 Sep 2025 11:40:02 +0300 Subject: [PATCH 19/23] [TODO(eddyb) recursion needs this!!!] [HACK(eddyb) this works now but requires the preceding commit] linker/inline: don't rescan inlined callee blocks. --- .../rustc_codegen_spirv/src/linker/inline.rs | 23 ++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/crates/rustc_codegen_spirv/src/linker/inline.rs b/crates/rustc_codegen_spirv/src/linker/inline.rs index 837261323d..072b3d8b92 100644 --- a/crates/rustc_codegen_spirv/src/linker/inline.rs +++ b/crates/rustc_codegen_spirv/src/linker/inline.rs @@ -574,22 +574,22 @@ impl Inliner<'_, '_> { ) { let mut block_idx = 0; while block_idx < function.blocks.len() { - // If we successfully inlined a block, then repeat processing on the same block, in - // case the newly inlined block has more inlined calls. - // TODO: This is quadratic - if !self.inline_block(function, block_idx, functions) { - // TODO(eddyb) skip past the inlined callee without rescanning it. - block_idx += 1; + match self.inline_block(function, block_idx, functions) { + Some(post_call_block_idx) => block_idx = post_call_block_idx, + None => block_idx += 1, } } } + // HACK(eddyb) returns `Some(post_call_block_idx)` in case of success, where + // `post_call_block_idx` is the index of the first block of the `caller`, + // that follows all the inlined callee's blocks. fn inline_block( &mut self, caller: &mut Function, block_idx: usize, functions: &[Result], - ) -> bool { + ) -> Option { // Find the first inlined OpFunctionCall let call = caller.blocks[block_idx] .instructions @@ -629,10 +629,7 @@ impl Inliner<'_, '_> { } } }); - let (call_index, call_inst, callee) = match call { - None => return false, - Some(call) => call, - }; + let (call_index, call_inst, callee) = call?; // Propagate "may abort" from callee to caller (i.e. as aborts get inlined). if self @@ -784,8 +781,8 @@ impl Inliner<'_, '_> { } // Insert the post-call block, after all the inlined callee blocks. + let post_call_block_idx = pre_call_block_idx + num_non_entry_inlined_callee_blocks + 1; { - let post_call_block_idx = pre_call_block_idx + num_non_entry_inlined_callee_blocks + 1; let post_call_block = Block { label: Some(Instruction::new(Op::Label, None, Some(return_jump), vec![])), instructions: post_call_block_insts, @@ -928,7 +925,7 @@ impl Inliner<'_, '_> { ); } - true + Some(post_call_block_idx) } fn add_clone_id_rules(&mut self, rewrite_rules: &mut FxHashMap, blocks: &[Block]) { From 0546ebd00c21ed3291d8d36fd110aca80bdf3972 Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 9 Sep 2025 11:40:02 +0300 Subject: [PATCH 20/23] Replace `SpirvValueKind::IllegalTypeUsed` with mere `undef`. --- .../src/builder/builder_methods.rs | 5 +---- .../src/builder/byte_addressable_buffer.rs | 10 +++------- crates/rustc_codegen_spirv/src/builder_spirv.rs | 17 ----------------- 3 files changed, 4 insertions(+), 28 deletions(-) diff --git a/crates/rustc_codegen_spirv/src/builder/builder_methods.rs b/crates/rustc_codegen_spirv/src/builder/builder_methods.rs index 4d5b3b9390..1311a9cc09 100644 --- a/crates/rustc_codegen_spirv/src/builder/builder_methods.rs +++ b/crates/rustc_codegen_spirv/src/builder/builder_methods.rs @@ -3411,10 +3411,7 @@ impl<'a, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'tcx> { if buffer_store_intrinsic { self.codegen_buffer_store_intrinsic(fn_abi, args); let void_ty = SpirvType::Void.def(rustc_span::DUMMY_SP, self); - return SpirvValue { - kind: SpirvValueKind::IllegalTypeUsed(void_ty), - ty: void_ty, - }; + return self.undef(void_ty); } if let Some((source_ty, target_ty)) = from_trait_impl { diff --git a/crates/rustc_codegen_spirv/src/builder/byte_addressable_buffer.rs b/crates/rustc_codegen_spirv/src/builder/byte_addressable_buffer.rs index 00ffaf661b..c0269b4bbf 100644 --- a/crates/rustc_codegen_spirv/src/builder/byte_addressable_buffer.rs +++ b/crates/rustc_codegen_spirv/src/builder/byte_addressable_buffer.rs @@ -2,7 +2,7 @@ use crate::maybe_pqp_cg_ssa as rustc_codegen_ssa; use super::Builder; -use crate::builder_spirv::{SpirvValue, SpirvValueExt, SpirvValueKind}; +use crate::builder_spirv::{SpirvValue, SpirvValueExt}; use crate::spirv_type::SpirvType; use rspirv::spirv::{Decoration, Word}; use rustc_abi::{Align, Size}; @@ -188,12 +188,8 @@ impl<'a, 'tcx> Builder<'a, 'tcx> { ) -> SpirvValue { let pass_mode = &fn_abi.unwrap().ret.mode; match pass_mode { - PassMode::Ignore => { - return SpirvValue { - kind: SpirvValueKind::IllegalTypeUsed(result_type), - ty: result_type, - }; - } + PassMode::Ignore => return self.undef(result_type), + // PassMode::Pair is identical to PassMode::Direct - it's returned as a struct PassMode::Direct(_) | PassMode::Pair(_, _) => (), PassMode::Cast { .. } => { diff --git a/crates/rustc_codegen_spirv/src/builder_spirv.rs b/crates/rustc_codegen_spirv/src/builder_spirv.rs index e05b433057..fc735a4178 100644 --- a/crates/rustc_codegen_spirv/src/builder_spirv.rs +++ b/crates/rustc_codegen_spirv/src/builder_spirv.rs @@ -40,13 +40,6 @@ pub enum SpirvValueKind { /// of such constants, instead of where they're generated (and cached). IllegalConst(Word), - /// This can only happen in one specific case - which is as a result of - /// `codegen_buffer_store_intrinsic`, that function is supposed to return - /// `OpTypeVoid`, however because it gets inline by the compiler it can't. - /// Instead we return this, and trigger an error if we ever end up using the - /// result of this function call (which we can't). - IllegalTypeUsed(Word), - // FIXME(eddyb) this shouldn't be needed, but `rustc_codegen_ssa` still relies // on converting `Function`s to `Value`s even for direct calls, the `Builder` // should just have direct and indirect `call` variants (or a `Callee` enum). @@ -166,16 +159,6 @@ impl SpirvValue { id } - SpirvValueKind::IllegalTypeUsed(id) => { - cx.tcx - .dcx() - .struct_span_err(span, "Can't use type as a value") - .with_note(format!("Type: *{}", cx.debug_type(id))) - .emit(); - - id - } - SpirvValueKind::FnAddr { .. } => { cx.builder .const_to_id From a9d449494cb8726c9c1939718da9a68f12bd7a3f Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 9 Sep 2025 11:40:02 +0300 Subject: [PATCH 21/23] Always register zombie messages, only at most defer their `Span`s. --- .../src/builder/builder_methods.rs | 16 ++- .../rustc_codegen_spirv/src/builder_spirv.rs | 98 +++++++++---------- .../rustc_codegen_spirv/src/codegen_cx/mod.rs | 17 +++- .../ui/dis/ptr_copy.normal.stderr | 31 +++++- 4 files changed, 103 insertions(+), 59 deletions(-) diff --git a/crates/rustc_codegen_spirv/src/builder/builder_methods.rs b/crates/rustc_codegen_spirv/src/builder/builder_methods.rs index 1311a9cc09..d20cc45b5f 100644 --- a/crates/rustc_codegen_spirv/src/builder/builder_methods.rs +++ b/crates/rustc_codegen_spirv/src/builder/builder_methods.rs @@ -2507,11 +2507,25 @@ impl<'a, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'tcx> { ); // Defer the cast so that it has a chance to be avoided. let original_ptr = ptr.def(self); + let bitcast_result_id = self.emit().bitcast(dest_ty, None, original_ptr).unwrap(); + + self.zombie( + bitcast_result_id, + &format!( + "cannot cast between pointer types\ + \nfrom `{}`\ + \n to `{}`", + self.debug_type(ptr.ty), + self.debug_type(dest_ty) + ), + ); + SpirvValue { + zombie_waiting_for_span: false, kind: SpirvValueKind::LogicalPtrCast { original_ptr, original_ptr_ty: ptr.ty, - bitcast_result_id: self.emit().bitcast(dest_ty, None, original_ptr).unwrap(), + bitcast_result_id, }, ty: dest_ty, } diff --git a/crates/rustc_codegen_spirv/src/builder_spirv.rs b/crates/rustc_codegen_spirv/src/builder_spirv.rs index fc735a4178..d4d7627725 100644 --- a/crates/rustc_codegen_spirv/src/builder_spirv.rs +++ b/crates/rustc_codegen_spirv/src/builder_spirv.rs @@ -70,6 +70,13 @@ pub enum SpirvValueKind { #[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)] pub struct SpirvValue { + // HACK(eddyb) used to cheaply check whether this is a SPIR-V value ID + // with a "zombie" (deferred error) attached to it, that may need a `Span` + // still (e.g. such as constants, which can't easily take a `Span`). + // FIXME(eddyb) a whole `bool` field is sadly inefficient, but anything + // which may make `SpirvValue` smaller requires far too much impl effort. + pub zombie_waiting_for_span: bool, + pub kind: SpirvValueKind, pub ty: Word, } @@ -103,7 +110,11 @@ impl SpirvValue { } else { SpirvValueKind::IllegalConst(pointee) }; - Some(SpirvValue { kind, ty }) + Some(SpirvValue { + zombie_waiting_for_span: entry.legal.is_err(), + kind, + ty, + }) } _ => None, } @@ -127,38 +138,7 @@ impl SpirvValue { } pub fn def_with_span(self, cx: &CodegenCx<'_>, span: Span) -> Word { - match self.kind { - SpirvValueKind::Def(id) => id, - - SpirvValueKind::IllegalConst(id) => { - let entry = &cx.builder.id_to_const.borrow()[&id]; - let msg = match entry.legal.unwrap_err() { - IllegalConst::Shallow(cause) => { - if let ( - LeafIllegalConst::CompositeContainsPtrTo, - SpirvConst::Composite(_fields), - ) = (cause, &entry.val) - { - // FIXME(eddyb) materialize this at runtime, using - // `OpCompositeConstruct` (transitively, i.e. after - // putting every field through `SpirvValue::def`), - // if we have a `Builder` to do that in. - // FIXME(eddyb) this isn't possible right now, as - // the builder would be dynamically "locked" anyway - // (i.e. attempting to do `bx.emit()` would panic). - } - - cause.message() - } - - IllegalConst::Indirect(cause) => cause.message(), - }; - - cx.zombie_with_span(id, span, msg); - - id - } - + let id = match self.kind { SpirvValueKind::FnAddr { .. } => { cx.builder .const_to_id @@ -171,26 +151,18 @@ impl SpirvValue { .val } - SpirvValueKind::LogicalPtrCast { + SpirvValueKind::Def(id) + | SpirvValueKind::IllegalConst(id) + | SpirvValueKind::LogicalPtrCast { original_ptr: _, - original_ptr_ty, - bitcast_result_id, - } => { - cx.zombie_with_span( - bitcast_result_id, - span, - &format!( - "cannot cast between pointer types\ - \nfrom `{}`\ - \n to `{}`", - cx.debug_type(original_ptr_ty), - cx.debug_type(self.ty) - ), - ); - - bitcast_result_id - } + original_ptr_ty: _, + bitcast_result_id: id, + } => id, + }; + if self.zombie_waiting_for_span { + cx.add_span_to_zombie_if_missing(id, span); } + id } } @@ -201,6 +173,7 @@ pub trait SpirvValueExt { impl SpirvValueExt for Word { fn with_type(self, ty: Word) -> SpirvValue { SpirvValue { + zombie_waiting_for_span: false, kind: SpirvValueKind::Def(self), ty, } @@ -606,7 +579,11 @@ impl<'tcx> BuilderSpirv<'tcx> { } else { SpirvValueKind::IllegalConst(entry.val) }; - return SpirvValue { kind, ty }; + return SpirvValue { + zombie_waiting_for_span: entry.legal.is_err(), + kind, + ty, + }; } let val = val_with_type.val; @@ -783,6 +760,17 @@ impl<'tcx> BuilderSpirv<'tcx> { LeafIllegalConst::UntypedConstDataFromAlloc, )), }; + + // FIXME(eddyb) avoid dragging "const (il)legality" around, as well + // (sadly that does require that `SpirvConst` -> SPIR-V be injective, + // e.g. `OpUndef` can never be used for unrepresentable constants). + if let Err(illegal) = legal { + let msg = match illegal { + IllegalConst::Shallow(cause) | IllegalConst::Indirect(cause) => cause.message(), + }; + cx.zombie_no_span(id, msg); + } + let val = val.tcx_arena_alloc_slices(cx); assert_matches!( self.const_to_id @@ -802,7 +790,11 @@ impl<'tcx> BuilderSpirv<'tcx> { } else { SpirvValueKind::IllegalConst(id) }; - SpirvValue { kind, ty } + SpirvValue { + zombie_waiting_for_span: legal.is_err(), + kind, + ty, + } } pub fn lookup_const_by_id(&self, id: Word) -> Option> { diff --git a/crates/rustc_codegen_spirv/src/codegen_cx/mod.rs b/crates/rustc_codegen_spirv/src/codegen_cx/mod.rs index 1d27a5c4e9..8ee69da9a1 100644 --- a/crates/rustc_codegen_spirv/src/codegen_cx/mod.rs +++ b/crates/rustc_codegen_spirv/src/codegen_cx/mod.rs @@ -245,9 +245,9 @@ impl<'tcx> CodegenCx<'tcx> { /// is stripped from the binary. /// /// Errors will only be emitted (by `linker::zombies`) for reachable zombies. - pub fn zombie_with_span(&self, word: Word, span: Span, reason: &str) { + pub fn zombie_with_span(&self, id: Word, span: Span, reason: &str) { self.zombie_decorations.borrow_mut().insert( - word, + id, ( ZombieDecoration { // FIXME(eddyb) this could take advantage of `Cow` and use @@ -258,8 +258,16 @@ impl<'tcx> CodegenCx<'tcx> { ), ); } - pub fn zombie_no_span(&self, word: Word, reason: &str) { - self.zombie_with_span(word, DUMMY_SP, reason); + pub fn zombie_no_span(&self, id: Word, reason: &str) { + self.zombie_with_span(id, DUMMY_SP, reason); + } + + pub fn add_span_to_zombie_if_missing(&self, id: Word, span: Span) { + if span != DUMMY_SP + && let Some((_, src_loc @ None)) = self.zombie_decorations.borrow_mut().get_mut(&id) + { + *src_loc = SrcLocDecoration::from_rustc_span(span, &self.builder); + } } pub fn finalize_module(self) -> Module { @@ -870,6 +878,7 @@ impl<'tcx> MiscCodegenMethods<'tcx> for CodegenCx<'tcx> { self.def_constant(ty, SpirvConst::ZombieUndefForFnAddr); SpirvValue { + zombie_waiting_for_span: false, kind: SpirvValueKind::FnAddr { function: function.id, }, diff --git a/tests/compiletests/ui/dis/ptr_copy.normal.stderr b/tests/compiletests/ui/dis/ptr_copy.normal.stderr index dd442fbb40..0be4afe69e 100644 --- a/tests/compiletests/ui/dis/ptr_copy.normal.stderr +++ b/tests/compiletests/ui/dis/ptr_copy.normal.stderr @@ -28,6 +28,12 @@ LL | pub fn main(i: f32, o: &mut f32) { error: cannot cast between pointer types from `*f32` to `*struct () { }` + --> $CORE_SRC/ptr/mod.rs:625:34 + | +LL | src: *const () = src as *const (), + | ^^^^^^^^^^^^^^^^ + | +note: used from within `core::ptr::copy::` --> $CORE_SRC/ptr/mod.rs:621:9 | LL | / ub_checks::assert_unsafe_precondition!( @@ -37,6 +43,29 @@ LL | | "ptr::copy requires that both pointer arguments are aligned a LL | | && ub_checks::maybe_is_aligned_and_not_null(dst, align, zero_size) LL | | ); | |_________^ +note: called by `ptr_copy::copy_via_raw_ptr` + --> $DIR/ptr_copy.rs:28:18 + | +LL | unsafe { core::ptr::copy(src, dst, 1) } + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +note: called by `ptr_copy::main` + --> $DIR/ptr_copy.rs:33:5 + | +LL | copy_via_raw_ptr(&i, o); + | ^^^^^^^^^^^^^^^^^^^^^^^ +note: called by `main` + --> $DIR/ptr_copy.rs:32:8 + | +LL | pub fn main(i: f32, o: &mut f32) { + | ^^^^ + +error: cannot cast between pointer types + from `*f32` + to `*struct () { }` + --> $CORE_SRC/ptr/mod.rs:626:32 + | +LL | dst: *mut () = dst as *mut (), + | ^^^^^^^^^^^^^^ | note: used from within `core::ptr::copy::` --> $CORE_SRC/ptr/mod.rs:621:9 @@ -64,5 +93,5 @@ note: called by `main` LL | pub fn main(i: f32, o: &mut f32) { | ^^^^ -error: aborting due to 2 previous errors +error: aborting due to 3 previous errors From ac5c2d8030989411c43471df8e80b440a6145ced Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 9 Sep 2025 11:40:02 +0300 Subject: [PATCH 22/23] Remove `SpirvValueKind::IllegalConst`. --- .../src/builder/builder_methods.rs | 20 +++++++---- .../rustc_codegen_spirv/src/builder_spirv.rs | 36 +++---------------- .../src/codegen_cx/constant.rs | 5 ++- 3 files changed, 20 insertions(+), 41 deletions(-) diff --git a/crates/rustc_codegen_spirv/src/builder/builder_methods.rs b/crates/rustc_codegen_spirv/src/builder/builder_methods.rs index d20cc45b5f..307b10faa9 100644 --- a/crates/rustc_codegen_spirv/src/builder/builder_methods.rs +++ b/crates/rustc_codegen_spirv/src/builder/builder_methods.rs @@ -2440,13 +2440,6 @@ impl<'a, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'tcx> { #[instrument(level = "trace", skip(self), fields(ptr, ptr_ty = ?self.debug_type(ptr.ty), dest_ty = ?self.debug_type(dest_ty)))] fn pointercast(&mut self, ptr: Self::Value, dest_ty: Self::Type) -> Self::Value { - // HACK(eddyb) reuse the special-casing in `const_bitcast`, which relies - // on adding a pointer type to an untyped pointer (to some const data). - if let SpirvValueKind::IllegalConst(_) = ptr.kind { - trace!("illegal const"); - return self.const_bitcast(ptr, dest_ty); - } - if ptr.ty == dest_ty { trace!("ptr.ty == dest_ty"); return ptr; @@ -2505,6 +2498,19 @@ impl<'a, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'tcx> { self.debug_type(ptr_pointee), self.debug_type(dest_pointee), ); + + // HACK(eddyb) reuse the special-casing in `const_bitcast`, which relies + // on adding a pointer type to an untyped pointer (to some const data). + if self.builder.lookup_const(ptr).is_some() { + // FIXME(eddyb) remove the condition on `zombie_waiting_for_span`, + // and constant-fold all pointer bitcasts, regardless of "legality", + // once `strip_ptrcasts` can undo `const_bitcast`, as well. + if ptr.zombie_waiting_for_span { + trace!("illegal const"); + return self.const_bitcast(ptr, dest_ty); + } + } + // Defer the cast so that it has a chance to be avoided. let original_ptr = ptr.def(self); let bitcast_result_id = self.emit().bitcast(dest_ty, None, original_ptr).unwrap(); diff --git a/crates/rustc_codegen_spirv/src/builder_spirv.rs b/crates/rustc_codegen_spirv/src/builder_spirv.rs index d4d7627725..ecb97b792e 100644 --- a/crates/rustc_codegen_spirv/src/builder_spirv.rs +++ b/crates/rustc_codegen_spirv/src/builder_spirv.rs @@ -35,11 +35,6 @@ use std::{fs::File, io::Write, path::Path}; pub enum SpirvValueKind { Def(Word), - /// The ID of a global instruction matching a `SpirvConst`, but which cannot - /// pass validation. Used to error (or attach zombie spans), at the usesites - /// of such constants, instead of where they're generated (and cached). - IllegalConst(Word), - // FIXME(eddyb) this shouldn't be needed, but `rustc_codegen_ssa` still relies // on converting `Function`s to `Value`s even for direct calls, the `Builder` // should just have direct and indirect `call` variants (or a `Callee` enum). @@ -96,7 +91,7 @@ impl SpirvValue { pub fn const_fold_load(self, cx: &CodegenCx<'_>) -> Option { match self.kind { - SpirvValueKind::Def(id) | SpirvValueKind::IllegalConst(id) => { + SpirvValueKind::Def(id) => { let &entry = cx.builder.id_to_const.borrow().get(&id)?; match entry.val { SpirvConst::PtrTo { pointee } => { @@ -104,15 +99,9 @@ impl SpirvValue { SpirvType::Pointer { pointee } => pointee, ty => bug!("load called on value that wasn't a pointer: {:?}", ty), }; - // FIXME(eddyb) deduplicate this `if`-`else` and its other copies. - let kind = if entry.legal.is_ok() { - SpirvValueKind::Def(pointee) - } else { - SpirvValueKind::IllegalConst(pointee) - }; Some(SpirvValue { zombie_waiting_for_span: entry.legal.is_err(), - kind, + kind: SpirvValueKind::Def(pointee), ty, }) } @@ -152,7 +141,6 @@ impl SpirvValue { } SpirvValueKind::Def(id) - | SpirvValueKind::IllegalConst(id) | SpirvValueKind::LogicalPtrCast { original_ptr: _, original_ptr_ty: _, @@ -573,15 +561,9 @@ impl<'tcx> BuilderSpirv<'tcx> { let val_with_type = WithType { ty, val }; if let Some(entry) = self.const_to_id.borrow().get(&val_with_type) { - // FIXME(eddyb) deduplicate this `if`-`else` and its other copies. - let kind = if entry.legal.is_ok() { - SpirvValueKind::Def(entry.val) - } else { - SpirvValueKind::IllegalConst(entry.val) - }; return SpirvValue { zombie_waiting_for_span: entry.legal.is_err(), - kind, + kind: SpirvValueKind::Def(entry.val), ty, }; } @@ -784,15 +766,9 @@ impl<'tcx> BuilderSpirv<'tcx> { .insert(id, WithConstLegality { val, legal }), None ); - // FIXME(eddyb) deduplicate this `if`-`else` and its other copies. - let kind = if legal.is_ok() { - SpirvValueKind::Def(id) - } else { - SpirvValueKind::IllegalConst(id) - }; SpirvValue { zombie_waiting_for_span: legal.is_err(), - kind, + kind: SpirvValueKind::Def(id), ty, } } @@ -803,9 +779,7 @@ impl<'tcx> BuilderSpirv<'tcx> { pub fn lookup_const(&self, def: SpirvValue) -> Option> { match def.kind { - SpirvValueKind::Def(id) | SpirvValueKind::IllegalConst(id) => { - self.lookup_const_by_id(id) - } + SpirvValueKind::Def(id) => self.lookup_const_by_id(id), _ => None, } } diff --git a/crates/rustc_codegen_spirv/src/codegen_cx/constant.rs b/crates/rustc_codegen_spirv/src/codegen_cx/constant.rs index acd9b42172..1bc8164dc6 100644 --- a/crates/rustc_codegen_spirv/src/codegen_cx/constant.rs +++ b/crates/rustc_codegen_spirv/src/codegen_cx/constant.rs @@ -3,7 +3,7 @@ use crate::maybe_pqp_cg_ssa as rustc_codegen_ssa; use super::CodegenCx; use crate::abi::ConvSpirvType; -use crate::builder_spirv::{SpirvConst, SpirvValue, SpirvValueExt, SpirvValueKind}; +use crate::builder_spirv::{SpirvConst, SpirvValue, SpirvValueExt}; use crate::spirv_type::SpirvType; use itertools::Itertools as _; use rspirv::spirv::Word; @@ -336,8 +336,7 @@ impl<'tcx> CodegenCx<'tcx> { pub fn const_bitcast(&self, val: SpirvValue, ty: Word) -> SpirvValue { // HACK(eddyb) special-case `const_data_from_alloc` + `static_addr_of` // as the old `from_const_alloc` (now `OperandRef::from_const_alloc`). - if let SpirvValueKind::IllegalConst(_) = val.kind - && let Some(SpirvConst::PtrTo { pointee }) = self.builder.lookup_const(val) + if let Some(SpirvConst::PtrTo { pointee }) = self.builder.lookup_const(val) && let Some(SpirvConst::ConstDataFromAlloc(alloc)) = self.builder.lookup_const_by_id(pointee) && let SpirvType::Pointer { pointee } = self.lookup_type(ty) From 9f04dde33f4cae411e85bfd07cdf278700eff826 Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 9 Sep 2025 11:40:02 +0300 Subject: [PATCH 23/23] Reduce `SpirvValue` lossiness around `strip_ptrcasts` and `const_fold_load`. --- .../src/builder/builder_methods.rs | 17 +- .../src/builder/format_args_decompiler.rs | 14 +- .../rustc_codegen_spirv/src/builder_spirv.rs | 194 +++++++++--------- .../rustc_codegen_spirv/src/codegen_cx/mod.rs | 5 +- 4 files changed, 120 insertions(+), 110 deletions(-) diff --git a/crates/rustc_codegen_spirv/src/builder/builder_methods.rs b/crates/rustc_codegen_spirv/src/builder/builder_methods.rs index 307b10faa9..5962adbabe 100644 --- a/crates/rustc_codegen_spirv/src/builder/builder_methods.rs +++ b/crates/rustc_codegen_spirv/src/builder/builder_methods.rs @@ -2512,8 +2512,8 @@ impl<'a, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'tcx> { } // Defer the cast so that it has a chance to be avoided. - let original_ptr = ptr.def(self); - let bitcast_result_id = self.emit().bitcast(dest_ty, None, original_ptr).unwrap(); + let ptr_id = ptr.def(self); + let bitcast_result_id = self.emit().bitcast(dest_ty, None, ptr_id).unwrap(); self.zombie( bitcast_result_id, @@ -2528,10 +2528,13 @@ impl<'a, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'tcx> { SpirvValue { zombie_waiting_for_span: false, - kind: SpirvValueKind::LogicalPtrCast { - original_ptr, - original_ptr_ty: ptr.ty, - bitcast_result_id, + kind: SpirvValueKind::Def { + id: bitcast_result_id, + original_ptr_before_casts: Some(SpirvValue { + zombie_waiting_for_span: ptr.zombie_waiting_for_span, + kind: ptr_id, + ty: ptr.ty, + }), }, ty: dest_ty, } @@ -3359,7 +3362,7 @@ impl<'a, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'tcx> { return_type, arguments, } => ( - if let SpirvValueKind::FnAddr { function } = callee.kind { + if let SpirvValueKind::FnAddr { function, .. } = callee.kind { assert_ty_eq!(self, callee_ty, pointee); function } diff --git a/crates/rustc_codegen_spirv/src/builder/format_args_decompiler.rs b/crates/rustc_codegen_spirv/src/builder/format_args_decompiler.rs index f04cf79f12..7dc8baf334 100644 --- a/crates/rustc_codegen_spirv/src/builder/format_args_decompiler.rs +++ b/crates/rustc_codegen_spirv/src/builder/format_args_decompiler.rs @@ -93,11 +93,11 @@ impl<'tcx> DecodedFormatArgs<'tcx> { // HACK(eddyb) some entry-points only take a `&str`, not `fmt::Arguments`. if let [ SpirvValue { - kind: SpirvValueKind::Def(a_id), + kind: SpirvValueKind::Def { id: a_id, .. }, .. }, SpirvValue { - kind: SpirvValueKind::Def(b_id), + kind: SpirvValueKind::Def { id: b_id, .. }, .. }, ref other_args @ .., @@ -116,14 +116,20 @@ impl<'tcx> DecodedFormatArgs<'tcx> { // HACK(eddyb) `panic_nounwind_fmt` takes an extra argument. [ SpirvValue { - kind: SpirvValueKind::Def(format_args_id), + kind: + SpirvValueKind::Def { + id: format_args_id, .. + }, .. }, _, // `&'static panic::Location<'static>` ] | [ SpirvValue { - kind: SpirvValueKind::Def(format_args_id), + kind: + SpirvValueKind::Def { + id: format_args_id, .. + }, .. }, _, // `force_no_backtrace: bool` diff --git a/crates/rustc_codegen_spirv/src/builder_spirv.rs b/crates/rustc_codegen_spirv/src/builder_spirv.rs index ecb97b792e..f430dfdf7f 100644 --- a/crates/rustc_codegen_spirv/src/builder_spirv.rs +++ b/crates/rustc_codegen_spirv/src/builder_spirv.rs @@ -16,7 +16,6 @@ use rustc_abi::Size; use rustc_arena::DroplessArena; use rustc_codegen_ssa::traits::ConstCodegenMethods as _; use rustc_data_structures::fx::{FxHashMap, FxHashSet}; -use rustc_middle::bug; use rustc_middle::mir::interpret::ConstAllocation; use rustc_middle::ty::TyCtxt; use rustc_span::source_map::SourceMap; @@ -31,40 +30,37 @@ use std::str; use std::sync::Arc; use std::{fs::File, io::Write, path::Path}; +// HACK(eddyb) silence warnings that are inaccurate wrt future changes. +#[non_exhaustive] #[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)] pub enum SpirvValueKind { - Def(Word), + Def { + id: Word, + + /// If `id` is a pointer cast, this will be `Some`, and contain all the + /// information necessary to regenerate the original `SpirvValue` before + /// *any* pointer casts were applied, effectively deferring the casts + /// (as long as all downstream uses apply `.strip_ptrcasts()` first), + /// and bypassing errors they might cause (due to SPIR-V limitations). + // + // FIXME(eddyb) wouldn't it be easier to use this for *any* bitcasts? + // (with some caveats around dedicated int<->ptr casts vs bitcasts) + original_ptr_before_casts: Option>, + }, // FIXME(eddyb) this shouldn't be needed, but `rustc_codegen_ssa` still relies // on converting `Function`s to `Value`s even for direct calls, the `Builder` // should just have direct and indirect `call` variants (or a `Callee` enum). FnAddr { function: Word, - }, - - /// Deferred pointer cast, for the `Logical` addressing model (which doesn't - /// really support raw pointers in the way Rust expects to be able to use). - /// - /// The cast's target pointer type is the `ty` of the `SpirvValue` that has - /// `LogicalPtrCast` as its `kind`, as it would be redundant to have it here. - LogicalPtrCast { - /// Pointer value being cast. - original_ptr: Word, - /// Pointer type of `original_ptr`. - original_ptr_ty: Word, - - /// Result ID for the `OpBitcast` instruction representing the cast, - /// to attach zombies to. - // - // HACK(eddyb) having an `OpBitcast` only works by being DCE'd away, - // or by being replaced with a noop in `qptr::lower`. - bitcast_result_id: Word, + // FIXME(eddyb) replace this ad-hoc zombie with a proper `SpirvConst`. + zombie_id: Word, }, } #[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)] -pub struct SpirvValue { +pub struct SpirvValue { // HACK(eddyb) used to cheaply check whether this is a SPIR-V value ID // with a "zombie" (deferred error) attached to it, that may need a `Span` // still (e.g. such as constants, which can't easily take a `Span`). @@ -72,43 +68,48 @@ pub struct SpirvValue { // which may make `SpirvValue` smaller requires far too much impl effort. pub zombie_waiting_for_span: bool, - pub kind: SpirvValueKind, + pub kind: K, pub ty: Word, } +impl SpirvValue { + fn map_kind(self, f: impl FnOnce(K) -> K2) -> SpirvValue { + let SpirvValue { + zombie_waiting_for_span, + kind, + ty, + } = self; + SpirvValue { + zombie_waiting_for_span, + kind: f(kind), + ty, + } + } +} + impl SpirvValue { pub fn strip_ptrcasts(self) -> Self { match self.kind { - SpirvValueKind::LogicalPtrCast { - original_ptr, - original_ptr_ty, - bitcast_result_id: _, - } => original_ptr.with_type(original_ptr_ty), + SpirvValueKind::Def { + id: _, + original_ptr_before_casts: Some(original_ptr), + } => original_ptr.map_kind(|id| SpirvValueKind::Def { + id, + original_ptr_before_casts: None, + }), _ => self, } } pub fn const_fold_load(self, cx: &CodegenCx<'_>) -> Option { - match self.kind { - SpirvValueKind::Def(id) => { - let &entry = cx.builder.id_to_const.borrow().get(&id)?; - match entry.val { - SpirvConst::PtrTo { pointee } => { - let ty = match cx.lookup_type(self.ty) { - SpirvType::Pointer { pointee } => pointee, - ty => bug!("load called on value that wasn't a pointer: {:?}", ty), - }; - Some(SpirvValue { - zombie_waiting_for_span: entry.legal.is_err(), - kind: SpirvValueKind::Def(pointee), - ty, - }) - } - _ => None, - } + match cx.builder.lookup_const(self)? { + SpirvConst::PtrTo { pointee } => { + // HACK(eddyb) this obtains a `SpirvValue` from the ID it contains, + // so there's some conceptual inefficiency there, but it does + // prevent any of the other details from being lost accidentally. + Some(cx.builder.id_to_const_and_val.borrow().get(&pointee)?.val.1) } - _ => None, } } @@ -128,24 +129,7 @@ impl SpirvValue { pub fn def_with_span(self, cx: &CodegenCx<'_>, span: Span) -> Word { let id = match self.kind { - SpirvValueKind::FnAddr { .. } => { - cx.builder - .const_to_id - .borrow() - .get(&WithType { - ty: self.ty, - val: SpirvConst::ZombieUndefForFnAddr, - }) - .expect("FnAddr didn't go through proper undef registration") - .val - } - - SpirvValueKind::Def(id) - | SpirvValueKind::LogicalPtrCast { - original_ptr: _, - original_ptr_ty: _, - bitcast_result_id: id, - } => id, + SpirvValueKind::Def { id, .. } | SpirvValueKind::FnAddr { zombie_id: id, .. } => id, }; if self.zombie_waiting_for_span { cx.add_span_to_zombie_if_missing(id, span); @@ -162,7 +146,10 @@ impl SpirvValueExt for Word { fn with_type(self, ty: Word) -> SpirvValue { SpirvValue { zombie_waiting_for_span: false, - kind: SpirvValueKind::Def(self), + kind: SpirvValueKind::Def { + id: self, + original_ptr_before_casts: None, + }, ty, } } @@ -380,11 +367,12 @@ pub struct BuilderSpirv<'tcx> { builder: RefCell, // Bidirectional maps between `SpirvConst` and the ID of the defined global - // (e.g. `OpConstant...`) instruction. - // NOTE(eddyb) both maps have `WithConstLegality` around their keys, which - // allows getting that legality information without additional lookups. - const_to_id: RefCell>, WithConstLegality>>, - id_to_const: RefCell>>>, + // (e.g. `OpConstant...`) instruction, with additional information in values + // (i.e. each map is keyed by only some part of the other map's value type), + // as needed to streamline operations (e.g. avoiding rederiving `SpirvValue`). + const_to_val: RefCell>, SpirvValue>>, + id_to_const_and_val: + RefCell, SpirvValue)>>>, debug_file_cache: RefCell>>, @@ -455,8 +443,8 @@ impl<'tcx> BuilderSpirv<'tcx> { source_map: tcx.sess.source_map(), dropless_arena: &tcx.arena.dropless, builder: RefCell::new(builder), - const_to_id: Default::default(), - id_to_const: Default::default(), + const_to_val: Default::default(), + id_to_const_and_val: Default::default(), debug_file_cache: Default::default(), enabled_capabilities, } @@ -560,12 +548,8 @@ impl<'tcx> BuilderSpirv<'tcx> { }; let val_with_type = WithType { ty, val }; - if let Some(entry) = self.const_to_id.borrow().get(&val_with_type) { - return SpirvValue { - zombie_waiting_for_span: entry.legal.is_err(), - kind: SpirvValueKind::Def(entry.val), - ty, - }; + if let Some(&v) = self.const_to_val.borrow().get(&val_with_type) { + return v; } let val = val_with_type.val; @@ -697,11 +681,11 @@ impl<'tcx> BuilderSpirv<'tcx> { SpirvConst::Composite(v) => v .iter() .map(|field| { - let field_entry = &self.id_to_const.borrow()[field]; + let field_entry = &self.id_to_const_and_val.borrow()[field]; field_entry.legal.and( // `field` is itself some legal `SpirvConst`, but can we have // it as part of an `OpConstantComposite`? - match field_entry.val { + match field_entry.val.0 { SpirvConst::PtrTo { .. } => Err(IllegalConst::Shallow( LeafIllegalConst::CompositeContainsPtrTo, )), @@ -729,14 +713,16 @@ impl<'tcx> BuilderSpirv<'tcx> { }) .unwrap_or(Ok(())), - SpirvConst::PtrTo { pointee } => match self.id_to_const.borrow()[&pointee].legal { - Ok(()) => Ok(()), + SpirvConst::PtrTo { pointee } => { + match self.id_to_const_and_val.borrow()[&pointee].legal { + Ok(()) => Ok(()), - // `Shallow` becomes `Indirect` when placed behind a pointer. - Err(IllegalConst::Shallow(cause) | IllegalConst::Indirect(cause)) => { - Err(IllegalConst::Indirect(cause)) + // `Shallow` becomes `Indirect` when placed behind a pointer. + Err(IllegalConst::Shallow(cause) | IllegalConst::Indirect(cause)) => { + Err(IllegalConst::Indirect(cause)) + } } - }, + } SpirvConst::ConstDataFromAlloc(_) => Err(IllegalConst::Shallow( LeafIllegalConst::UntypedConstDataFromAlloc, @@ -754,32 +740,44 @@ impl<'tcx> BuilderSpirv<'tcx> { } let val = val.tcx_arena_alloc_slices(cx); + + // FIXME(eddyb) the `val`/`v` name clash is a bit unfortunate. + let v = SpirvValue { + zombie_waiting_for_span: legal.is_err(), + kind: SpirvValueKind::Def { + id, + original_ptr_before_casts: None, + }, + ty, + }; + assert_matches!( - self.const_to_id + self.const_to_val .borrow_mut() - .insert(WithType { ty, val }, WithConstLegality { val: id, legal }), + .insert(WithType { ty, val }, v), None ); assert_matches!( - self.id_to_const - .borrow_mut() - .insert(id, WithConstLegality { val, legal }), + self.id_to_const_and_val.borrow_mut().insert( + id, + WithConstLegality { + val: (val, v), + legal + } + ), None ); - SpirvValue { - zombie_waiting_for_span: legal.is_err(), - kind: SpirvValueKind::Def(id), - ty, - } + + v } pub fn lookup_const_by_id(&self, id: Word) -> Option> { - Some(self.id_to_const.borrow().get(&id)?.val) + Some(self.id_to_const_and_val.borrow().get(&id)?.val.0) } pub fn lookup_const(&self, def: SpirvValue) -> Option> { match def.kind { - SpirvValueKind::Def(id) => self.lookup_const_by_id(id), + SpirvValueKind::Def { id, .. } => self.lookup_const_by_id(id), _ => None, } } diff --git a/crates/rustc_codegen_spirv/src/codegen_cx/mod.rs b/crates/rustc_codegen_spirv/src/codegen_cx/mod.rs index 8ee69da9a1..f36d8a624c 100644 --- a/crates/rustc_codegen_spirv/src/codegen_cx/mod.rs +++ b/crates/rustc_codegen_spirv/src/codegen_cx/mod.rs @@ -875,12 +875,15 @@ impl<'tcx> MiscCodegenMethods<'tcx> for CodegenCx<'tcx> { // Create these `OpUndef`s up front, instead of on-demand in `SpirvValue::def`, // because `SpirvValue::def` can't use `cx.emit()`. - self.def_constant(ty, SpirvConst::ZombieUndefForFnAddr); + let zombie_id = self + .def_constant(ty, SpirvConst::ZombieUndefForFnAddr) + .def_with_span(self, span); SpirvValue { zombie_waiting_for_span: false, kind: SpirvValueKind::FnAddr { function: function.id, + zombie_id, }, ty, }