diff --git a/src/analysis/pe.rs b/src/analysis/pe.rs index 65671d0..e7f2305 100644 --- a/src/analysis/pe.rs +++ b/src/analysis/pe.rs @@ -116,7 +116,16 @@ fn detect_imports(obj: &mut ObjInfo, pe: &PeFile32, _data: &[u8]) -> Result<()> Ok((_hint, name)) => { let n = std::str::from_utf8(name).unwrap_or("unknown").trim_end_matches('\0'); - format!("__imp__{}", n) + // MSVC IAT name convention: '__imp_' + decorated name. + // Cdecl/stdcall C names already carry a leading '_' (so + // the result is '__imp__Foo'); C++ mangled ('?...') and + // fastcall ('@...') names do not, yielding '__imp_?foo' + // or '__imp_@foo@8' with a single underscore. + if n.starts_with('?') || n.starts_with('@') { + format!("__imp_{}", n) + } else { + format!("__imp__{}", n) + } } Err(_) => { iat_off += 4; @@ -177,12 +186,18 @@ fn detect_imports(obj: &mut ObjInfo, pe: &PeFile32, _data: &[u8]) -> Result<()> i += 1; continue; } - // Derive thunk name: "__imp__SetWindowPos" → "_SetWindowPos" - // (strip double underscore, keep single) - let preferred = imp_name - .strip_prefix("__imp__") - .map(|n| format!("_{}", n)) - .unwrap_or_else(|| imp_name.to_string()); + // Derive thunk name from IAT symbol: + // "__imp__SetWindowPos" → "_SetWindowPos" (cdecl: drop one '_') + // "__imp_?foo@@YAXXZ" → "?foo@@YAXXZ" (MSVC C++: drop '__imp_') + // "__imp_@foo@8" → "@foo@8" (fastcall: drop '__imp_') + let preferred = if let Some(rest) = imp_name.strip_prefix("__imp_") { + // `rest` is the decorated thunk name as MSVC writes it: + // for cdecl it still starts with '_' (the original C prefix); + // for '?'/'@' it is the bare mangled name. + rest.to_string() + } else { + imp_name.to_string() + }; // If the preferred name is already taken (e.g. two thunks for the same // import), fall back to a generated name so there's no duplicate symbol. let thunk_name = if used_names.contains(&preferred) { diff --git a/src/cmd/rel.rs b/src/cmd/rel.rs index 1790276..cdc52f5 100644 --- a/src/cmd/rel.rs +++ b/src/cmd/rel.rs @@ -532,6 +532,7 @@ fn merge(args: MergeArgs) -> Result<()> { file_offset: mod_section.file_offset, section_known: mod_section.section_known, splits: mod_section.splits.clone(), + sub_regions: mod_section.sub_regions.clone(), }); section_map.nested_insert(module.module_id, mod_section.elf_index, offset)?; for (_, mod_symbol) in module.symbols.for_section(mod_section_index) { diff --git a/src/obj/mod.rs b/src/obj/mod.rs index 5242d32..e57787d 100644 --- a/src/obj/mod.rs +++ b/src/obj/mod.rs @@ -14,7 +14,7 @@ use anyhow::{Result, anyhow, bail, ensure}; use objdiff_core::obj::split_meta::SplitMeta; pub use relocations::{ObjReloc, ObjRelocKind, ObjRelocations}; pub use sections::{ - ObjSection, ObjSectionKind, ObjSections, SectionIndex, section_kind_for_section, + ObjSection, ObjSectionKind, ObjSections, ObjSubRegion, SectionIndex, section_kind_for_section, }; pub use splits::{ObjSplit, ObjSplits}; pub use symbols::{ diff --git a/src/obj/sections.rs b/src/obj/sections.rs index ca5fa59..6d90d29 100644 --- a/src/obj/sections.rs +++ b/src/obj/sections.rs @@ -20,6 +20,17 @@ pub enum ObjSectionKind { Bss, } +/// A logical sub-section that exists within a physical section's VA range. +/// Used to model MSVC COFF grouped sub-sections like `.rdata$r` / `.xdata$x` +/// which the linker merges into a single PE section (`.rdata`) but which +/// .obj files still emit under their distinct names. +#[derive(Debug, Clone)] +pub struct ObjSubRegion { + pub start: u32, + pub end: u32, + pub name: String, +} + #[derive(Debug, Clone)] pub struct ObjSection { pub name: String, @@ -35,6 +46,7 @@ pub struct ObjSection { pub file_offset: u64, pub section_known: bool, pub splits: ObjSplits, + pub sub_regions: Vec, } #[derive(Debug, Clone)] diff --git a/src/util/coff.rs b/src/util/coff.rs index 6c0f82f..7edaf30 100644 --- a/src/util/coff.rs +++ b/src/util/coff.rs @@ -8,7 +8,7 @@ use object::{ RelocationEncoding, RelocationKind, RelocationTarget, SectionKind, SymbolFlags, SymbolKind, SymbolScope, write::{ - Object as WriteObject, Relocation, RelocationFlags, SectionId, Symbol, SymbolId, + Mangling, Object as WriteObject, Relocation, RelocationFlags, SectionId, Symbol, SymbolId, SymbolSection as WriteSymbolSection, }, }; @@ -78,6 +78,7 @@ pub fn process_coff(data: &[u8], name: &str) -> Result<(ObjInfo, Option)> { file_offset: section.file_range().map(|(v, _)| v).unwrap_or_default(), section_known: true, splits: Default::default(), + sub_regions: Vec::new(), }); } @@ -208,6 +209,13 @@ pub fn process_coff(data: &[u8], name: &str) -> Result<(ObjInfo, Option)> { pub fn write_coff(obj: &ObjInfo, export_all: bool) -> Result> { let mut out = WriteObject::new(BinaryFormat::Coff, Architecture::I386, Endianness::Little); + // Disable object crate's auto leading-underscore mangling: it blindly + // prepends '_' to every Text/Data symbol, which corrupts MSVC C++ + // mangled names ('?...') and fastcall names ('@...'). dtk's stored + // symbol names already follow the MSVC convention literally (cdecl C + // names include their leading '_' in symbols.txt; mangled names do not), + // so we write them verbatim. + out.set_mangling(Mangling::None); // Add sections and build section id map (indexed by ObjSectionIndex) let mut section_ids: Vec> = vec![None; obj.sections.len() as usize]; diff --git a/src/util/config.rs b/src/util/config.rs index 7d58141..4fed265 100644 --- a/src/util/config.rs +++ b/src/util/config.rs @@ -482,6 +482,18 @@ where write!(w, " vaddr:{:#010X}", vaddr)?; } writeln!(w)?; + // Round-trip logical sub-section declarations (e.g. .rdata$r) so they + // survive `dtk coff split` regenerating splits.txt. + for region in §ion.sub_regions { + writeln!( + w, + "\t{:<11} type:{} vaddr:{:#010X} end:{:#010X}", + region.name, + section_kind_to_str(section.kind), + region.start, + region.end, + )?; + } } for unit in obj.link_order.iter().filter(|unit| all || !unit.autogenerated) { write!(w, "\n{}:", unit.name)?; @@ -555,6 +567,10 @@ pub struct SectionDef { pub kind: Option, pub align: Option, pub vaddr: Option, + /// Optional end VA. Combined with `vaddr`, declares this entry as a + /// logical sub-section nested inside an existing physical section (e.g. + /// MSVC COFF `.rdata$r` mapped onto a PE `.rdata`). + pub end: Option, } enum SplitLine { @@ -610,7 +626,7 @@ fn parse_section_line(captures: Captures, state: &SplitState) -> Result Result { section.vaddr = Some(parse_u32(value)?); } + "end" => { + section.end = Some(parse_u32(value)?); + } _ => bail!("Unknown section attribute '{attr}'"), } } else { @@ -726,8 +745,24 @@ where } ( SplitState::Sections(index), - SplitLine::Section(SectionDef { name, kind, align, vaddr }), + SplitLine::Section(SectionDef { name, kind, align, vaddr, end }), ) => { + // Logical sub-section: declared with `vaddr:X end:Y`. Mapped + // onto whichever physical section contains [X, Y) — no new + // physical section is consumed. + if obj.kind == ObjKind::Executable + && let (Some(start), Some(stop)) = (vaddr, end) + { + ensure!(stop > start, "Section '{name}' end must exceed vaddr"); + let (parent_idx, _) = obj.sections.with_range(start..stop)?; + let parent = obj.sections.get_mut(parent_idx).unwrap(); + parent.sub_regions.push(crate::obj::ObjSubRegion { + start, + end: stop, + name: name.clone(), + }); + continue; + } let Some(obj_section) = obj.sections.get_mut(*index) else { bail!( "Section out of bounds: {} (index {}), object has {} sections", diff --git a/src/util/dol.rs b/src/util/dol.rs index d053884..bb116cf 100644 --- a/src/util/dol.rs +++ b/src/util/dol.rs @@ -383,6 +383,7 @@ pub fn process_dol(buf: &[u8], name: &str) -> Result { file_offset: file_offset as u64, section_known: known, splits: Default::default(), + sub_regions: Vec::new(), }); } } else { @@ -436,6 +437,7 @@ pub fn process_dol(buf: &[u8], name: &str) -> Result { file_offset: dol_section.file_offset as u64, section_known: known, splits: Default::default(), + sub_regions: Vec::new(), }); } } @@ -468,6 +470,7 @@ pub fn process_dol(buf: &[u8], name: &str) -> Result { file_offset: 0, section_known: false, splits: Default::default(), + sub_regions: Vec::new(), }); } @@ -488,6 +491,7 @@ pub fn process_dol(buf: &[u8], name: &str) -> Result { file_offset: 0, section_known: false, splits: Default::default(), + sub_regions: Vec::new(), }); let mut obj = ObjInfo::new( ObjKind::Executable, @@ -515,6 +519,7 @@ pub fn process_dol(buf: &[u8], name: &str) -> Result { file_offset: 0, section_known: false, splits: Default::default(), + sub_regions: Vec::new(), }); sections.push(ObjSection { name: ".sbss".to_string(), @@ -529,6 +534,7 @@ pub fn process_dol(buf: &[u8], name: &str) -> Result { file_offset: 0, section_known: false, splits: Default::default(), + sub_regions: Vec::new(), }); } n => bail!("Invalid number of BSS sections: {}", n), diff --git a/src/util/elf.rs b/src/util/elf.rs index e8dd0d3..0be4668 100644 --- a/src/util/elf.rs +++ b/src/util/elf.rs @@ -103,6 +103,7 @@ pub fn process_elf(path: &Utf8NativePath) -> Result { file_offset: section.file_range().map(|(v, _)| v).unwrap_or_default(), section_known: true, splits: Default::default(), + sub_regions: Vec::new(), }); } diff --git a/src/util/map.rs b/src/util/map.rs index 428f9bd..2dceae1 100644 --- a/src/util/map.rs +++ b/src/util/map.rs @@ -1158,6 +1158,7 @@ pub fn create_obj(result: &MapInfo) -> Result { file_offset, section_known: true, splits: Default::default(), + sub_regions: Vec::new(), } }) .collect(); diff --git a/src/util/rel.rs b/src/util/rel.rs index 99390a5..665d789 100644 --- a/src/util/rel.rs +++ b/src/util/rel.rs @@ -452,6 +452,7 @@ where file_offset: offset as u64, section_known, splits: Default::default(), + sub_regions: Vec::new(), }); } ensure!( diff --git a/src/util/rso.rs b/src/util/rso.rs index 32ea771..63761b6 100644 --- a/src/util/rso.rs +++ b/src/util/rso.rs @@ -500,6 +500,7 @@ where file_offset: offset as u64, section_known: false, splits: Default::default(), + sub_regions: Vec::new(), }); if offset == 0 { total_bss_size += size; diff --git a/src/util/rsp.rs b/src/util/rsp.rs index 1ac9f16..091d817 100644 --- a/src/util/rsp.rs +++ b/src/util/rsp.rs @@ -166,7 +166,9 @@ pub fn generate_args_rsp( lines.push(format!("/BASE:{:#x}", pe.image_base)); - // Resolve entry symbol name from the entry VA + // Resolve entry symbol name from the entry VA. + // lld-link's /ENTRY auto-prepends '_' for i386 PE, so strip the leading + // underscore from cdecl C names; mangled names ('?', '@') are passed as-is. if let Some(entry_sym) = obj.entry.and_then(|e| { let (sec_idx, _) = obj.sections.at_address(e as u32).ok()?; obj.symbols @@ -174,7 +176,8 @@ pub fn generate_args_rsp( .find(|(_, s)| s.kind == crate::obj::ObjSymbolKind::Function) .map(|(_, s)| s.name.clone()) }) { - lines.push(format!("/ENTRY:{entry_sym}")); + let entry_arg = entry_sym.strip_prefix('_').unwrap_or(&entry_sym); + lines.push(format!("/ENTRY:{entry_arg}")); } lines.push(format!("/SUBSYSTEM:{},{}", pe.subsystem_name(), pe.major_subsystem_version,)); diff --git a/src/util/split.rs b/src/util/split.rs index 2ce4a5e..6a54aac 100644 --- a/src/util/split.rs +++ b/src/util/split.rs @@ -716,11 +716,28 @@ fn create_gap_splits(obj: &mut ObjInfo) -> Result<()> { current_address, split_align ); - // Find any duplicate symbols in this range + // Break auto-fill at sub-region boundaries so the .rdata$r + // and .xdata$x slices each get their own auto unit (and the + // emitted COFF section name matches the region they fall in). let mut new_split_end = split_start; + for r in §ion.sub_regions { + if current_address.address < r.start && new_split_end.address > r.start { + new_split_end.address = r.start; + } + if current_address.address >= r.start + && current_address.address < r.end + && new_split_end.address > r.end + { + new_split_end.address = r.end; + } + } + // Find any duplicate symbols in this range let symbols = obj .symbols - .for_section_range(section_index, current_address.address..split_start.address) + .for_section_range( + section_index, + current_address.address..new_split_end.address, + ) .filter(|&(_, s)| !s.flags.is_stripped()) .collect_vec(); let mut existing_symbols = HashSet::new(); @@ -772,7 +789,16 @@ fn create_gap_splits(obj: &mut ObjInfo) -> Result<()> { current_address, new_split_end ); - let effective_section = prev_rename.as_deref().unwrap_or(§ion.name); + // Prefer an explicit sub-region declaration (e.g. `.rdata$r` + // declared in splits.txt Sections:) over the carry-forward + // rename from the last consumed split. + let region_rename = section.sub_regions.iter().find_map(|r| { + (current_address.address >= r.start && current_address.address < r.end) + .then(|| r.name.clone()) + }); + let effective_rename = region_rename.or_else(|| prev_rename.clone()); + let effective_section = + effective_rename.as_deref().unwrap_or(§ion.name); let unit = format!( "auto_{:02}_{:08X}_{}", current_address.section, @@ -788,7 +814,7 @@ fn create_gap_splits(obj: &mut ObjInfo) -> Result<()> { common: false, autogenerated: true, skip: false, - rename: prev_rename.clone(), + rename: effective_rename, }, ); current_address = new_split_end; @@ -1612,6 +1638,7 @@ pub fn split_obj( + (current_address.address as u64 - section.address), section_known: true, splits: Default::default(), + sub_regions: Vec::new(), }); }