From 6a71b1e1065b9078a8f8d95d25c89bbea42c97e4 Mon Sep 17 00:00:00 2001 From: Frank Pound Date: Thu, 14 May 2026 19:30:42 -0400 Subject: [PATCH] fix: support multi-segment ELF layouts in DT_STRTAB resolution PROBLEM ------- build_dynsym_data() crashes with SIGSEGV (EXC_BAD_ACCESS) on ELF shared objects that use 3+ PT_LOAD segments where .dynstr resides in a separate RW segment rather than the text segment. Reproduced against PySide2/Shiboken2 .so files from Binary Ninja, but affects any ELF linked with a layout that places .dynstr outside the text segment. ROOT CAUSE ---------- load_dynamic_segment_data() resolves DT_STRTAB using: obj->dynstr = &obj->mem[entry.value - elf_text_base(obj)]; This assumes DT_STRTAB always lives in the text segment. For 3-segment layouts the VMA-to-file-offset mapping differs per segment: LOAD #1: vaddr=0x000000 offset=0x000000 (text, r-x) LOAD #2: vaddr=0x240a88 offset=0x040a88 (data, rw-) LOAD #3: vaddr=0x243000 offset=0x054000 (dynstr, rw-) DT_STRTAB=0x2433c0 yields offset 0x2433c0 (past 355KB file end) instead of correct offset 0x543c0. Additionally, build_dynsym_data() has no bounds check on st_name before indexing into dynstr, unlike build_symtab_data() which does. FIX --- 1. Use elf_address_pointer() to resolve DT_STRTAB virtual address to file offset via PT_LOAD segment walk. This handles any number of LOAD segments correctly. Legacy text-base math kept as fallback for edge cases. 2. Add st_name < dynseg.dynstr.size bounds check in build_dynsym_data() for both 32-bit and 64-bit paths, matching the existing pattern in build_symtab_data(). Out-of-bounds indices resolve to "invalid_name_index" instead of crashing. 3. Relax DT_SYMTAB and DT_STRTAB anomaly checks to accept addresses in any LOAD segment (via elf_address_pointer != NULL) rather than only the text segment. NOTE: The existing sanity_check_st_name() stub (marked XXX TODO) remains a no-op; the bounds check in build_dynsym_data effectively covers the dynstr case. TESTING ------- Verified against the crashing binary (libshiboken2.abi3.so.5.14, 497 dynamic symbols) -- all symbols now resolve correctly. --- src/internal.c | 69 ++++++++++++++++++++++++++++---------------------- 1 file changed, 39 insertions(+), 30 deletions(-) diff --git a/src/internal.c b/src/internal.c index b99c696..c1ce234 100644 --- a/src/internal.c +++ b/src/internal.c @@ -325,9 +325,13 @@ build_dynsym_data(struct elfobj *obj) Elf32_Sym *dsym32; Elf64_Sym *dsym64; struct elf_dynsym_list *list = &obj->list.dynsym; + size_t dynstr_sz = obj->dynseg.dynstr.size; LIST_INIT(&obj->list.dynsym); + if (obj->dynstr == NULL || dynstr_sz == 0) + return true; /* no dynstr to resolve names from */ + for (i = 0; i < obj->dynsym_count; i++) { struct elf_symbol_node *symbol = malloc(sizeof(*symbol)); @@ -337,7 +341,11 @@ build_dynsym_data(struct elfobj *obj) switch(obj->e_class) { case elfclass32: dsym32 = obj->dynsym32; - symbol->name = &obj->dynstr[dsym32[i].st_name]; + if (dsym32[i].st_name < dynstr_sz) { + symbol->name = &obj->dynstr[dsym32[i].st_name]; + } else { + symbol->name = "invalid_name_index"; + } symbol->value = dsym32[i].st_value; symbol->shndx = dsym32[i].st_shndx; symbol->size = dsym32[i].st_size; @@ -347,7 +355,11 @@ build_dynsym_data(struct elfobj *obj) break; case elfclass64: dsym64 = obj->dynsym64; - symbol->name = &obj->dynstr[dsym64[i].st_name]; + if (dsym64[i].st_name < dynstr_sz) { + symbol->name = &obj->dynstr[dsym64[i].st_name]; + } else { + symbol->name = "invalid_name_index"; + } symbol->value = dsym64[i].st_value; symbol->shndx = dsym64[i].st_shndx; symbol->size = dsym64[i].st_size; @@ -853,31 +865,22 @@ load_dynamic_segment_data(struct elfobj *obj) if (entry.tag != DT_STRTAB) continue; /* - * we must handle anomalies where .dynstr - * is not stored in the text segment. I've seen this before - * with strange linker script configs where .dynstr is writable - * and in the data segment. For now return false if .dynstr is - * not in the text segment and we are performing forensics - * reconstruction. We must also adjust elf_data_base and elf_text_base - * to account for SCOP binaries. + * Resolve DT_STRTAB (a virtual address) to a file offset. + * Use elf_address_pointer() which properly handles multi- + * segment layouts (e.g. 3 LOAD segments where .dynstr lives + * in a separate RW segment rather than the text segment). + * Falls back to the legacy text-base subtraction if the + * segment walk fails. */ -#if 0 - if (entry.value >= - elf_text_base(obj) + elf_text_filesz(obj)) { - if (entry.value >= elf_data_base(obj) && - entry.value < elf_data_base(obj) + elf_data_filesz(obj)) { - obj->dynstr = (char *)&obj->mem[entry.value - - elf_data_base(obj)]; - if (obj->dynstr == NULL) - return false; - } else { - fprintf(stderr, - ".dynstr points outside of text and data segment\n"); - return false; + obj->dynstr = (char *)elf_address_pointer(obj, entry.value); + if (obj->dynstr == NULL) { + /* Fallback: legacy text-base calculation */ + if (entry.value >= elf_text_base(obj)) { + size_t off = entry.value - elf_text_base(obj); + if (off < obj->size) + obj->dynstr = (char *)&obj->mem[off]; } } -#endif - obj->dynstr = (char *)&obj->mem[entry.value - elf_text_base(obj)]; if (obj->dynstr == NULL) return false; } @@ -944,9 +947,12 @@ load_dynamic_segment_data(struct elfobj *obj) case DT_SYMTAB: if (dt_symtab++ > 0) break; - if (entry.value < elf_text_base(obj) || - entry.value > elf_text_base(obj) + - elf_text_filesz(obj) - ptr_width - 1) { + /* + * DT_SYMTAB can reside in any LOAD segment, not just + * the text segment (e.g. 3-segment layouts). Only flag + * as anomalous if it can't be resolved at all. + */ + if (elf_address_pointer(obj, entry.value) == NULL) { obj->anomalies |= INVALID_F_VITAL_DTAG_VALUE; } obj->dynseg.dynsym.addr = entry.value; @@ -954,9 +960,12 @@ load_dynamic_segment_data(struct elfobj *obj) case DT_STRTAB: if (dt_strtab++ > 0) break; - if (entry.value < elf_text_base(obj) || - entry.value > elf_text_base(obj) + - elf_text_filesz(obj) - 1) { + /* + * DT_STRTAB can reside in any LOAD segment (e.g. + * writable .dynstr in a separate RW LOAD segment). + * Only flag as anomalous if unreachable. + */ + if (elf_address_pointer(obj, entry.value) == NULL) { obj->anomalies |= INVALID_F_VITAL_DTAG_VALUE; } obj->dynseg.dynstr.addr = entry.value;