diff --git a/Cargo.toml b/Cargo.toml index 11307468..3fae47eb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,13 +15,14 @@ libc = "0.2.14" optional = true version = "1.0.0" -[dev-dependencies] +[target.'cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))'.dev-dependencies] simd = "0.1" [features] -default = ["alloc", "valgrind"] +default = ["alloc", "valgrind", "unwind"] alloc = [] valgrind = ["valgrind_request"] +unwind = [] # These apply only to tests within this library; assembly at -O0 is completely # unreadable, so use -O1. diff --git a/README.md b/README.md index feec836a..9aaf20f8 100644 --- a/README.md +++ b/README.md @@ -117,7 +117,7 @@ there should be at least 8 KiB of free stack space, or panicking will result in ## Limitations -The architectures currently supported are: x86, x86_64, aarch64, or1k. +The architectures currently supported are: x86, x86_64, aarch64, arm, or1k. The platforms currently supported are: bare metal, Linux (any libc), FreeBSD, DragonFly BSD, macOS. @@ -175,13 +175,15 @@ of callee-saved registers. ### Call stack splicing -Non-Windows platforms use [DWARF][] for both stack unwinding and debugging. DWARF call frame -information is very generic to be ABI-agnostic—it defines a bytecode that describes the actions -that need to be performed to simulate returning from a function. libfringe uses this bytecode -to specify that, after the generator function has returned, execution continues at the point -where the generator function was resumed the last time. +Non-Windows platforms use [DWARF][] (or the highly similar [ARM EHABI][ehabi]) for both stack +unwinding and debugging. DWARF call frame information is very generic to be ABI-agnostic— +it defines a bytecode that describes the actions that need to be performed to simulate +returning from a function. libfringe uses this bytecode to specify that, after the generator +function has returned, execution continues at the point where the generator function was +resumed the last time. [dwarf]: http://dwarfstd.org +[ehabi]: http://infocenter.arm.com/help/topic/com.arm.doc.ihi0038b/IHI0038B_ehabi.pdf ## Windows compatibility diff --git a/src/arch/aarch64.rs b/src/arch/aarch64.rs index 58a4a643..d2c623ac 100644 --- a/src/arch/aarch64.rs +++ b/src/arch/aarch64.rs @@ -47,14 +47,12 @@ // from the stack frame at x29 (in the parent stack), thus continuing // unwinding at the swap call site instead of falling off the end of context stack. use core::mem; -use stack::Stack; +use arch::StackPointer; +use unwind; pub const STACK_ALIGNMENT: usize = 16; -#[derive(Debug, Clone, Copy)] -pub struct StackPointer(*mut usize); - -pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize, StackPointer) -> !) -> StackPointer { +pub unsafe fn init(stack_base: *mut u8, f: unsafe fn(usize, StackPointer)) -> StackPointer { #[cfg(not(target_vendor = "apple"))] #[naked] unsafe extern "C" fn trampoline_1() { @@ -126,16 +124,38 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize, StackPointer) - # trampoline_2. nop - # Call the provided function. - ldr x2, [sp, #16] - blr x2 - "# - : : : : "volatile") - } + # Call unwind_wrapper with the provided function and the stack base address. + add x2, sp, #32 + ldr x3, [sp, #16] + bl ${0} + + # Restore the stack pointer of the parent context. No CFI adjustments + # are needed since we have the same stack frame as trampoline_1. + ldr x2, [sp] + mov sp, x2 + + # Load frame and instruction pointers of the parent context. + ldp x29, x30, [sp], #16 + .cfi_adjust_cfa_offset -16 + .cfi_restore x29 + .cfi_restore x30 - unsafe fn push(sp: &mut StackPointer, val: usize) { - sp.0 = sp.0.offset(-1); - *sp.0 = val + # If the returned value is nonzero, trigger an unwind in the parent + # context with the given exception object. + cbnz x0, ${1} + + # Clear the stack pointer. We can't call into this context any more once + # the function has returned. + mov x1, #0 + + # Return into the parent context. Use `br` instead of a `ret` to avoid + # return address mispredictions. + br x30 + "# + : + : "s" (unwind::unwind_wrapper as usize) + "s" (unwind::start_unwind as usize) + : : "volatile") } // We set up the stack in a somewhat special way so that to the unwinder it @@ -146,43 +166,34 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize, StackPointer) - // followed by the x29 value for that frame. This setup supports unwinding // using DWARF CFI as well as the frame pointer-based unwinding used by tools // such as perf or dtrace. - let mut sp = StackPointer(stack.base() as *mut usize); + let mut sp = StackPointer::new(stack_base); - push(&mut sp, 0 as usize); // Padding to ensure the stack is properly aligned - push(&mut sp, f as usize); // Function that trampoline_2 should call + sp.push(0 as usize); // Padding to ensure the stack is properly aligned + sp.push(f as usize); // Function that trampoline_2 should call // Call frame for trampoline_2. The CFA slot is updated by swap::trampoline // each time a context switch is performed. - push(&mut sp, trampoline_1 as usize + 4); // Return after the nop - push(&mut sp, 0xdeaddeaddead0cfa); // CFA slot + sp.push(trampoline_1 as usize + 4); // Return after the nop + sp.push(0xdeaddeaddead0cfa); // CFA slot // Call frame for swap::trampoline. We set up the x29 value to point to the // parent call frame. - let frame = sp; - push(&mut sp, trampoline_2 as usize + 4); // Entry point, skip initial nop - push(&mut sp, frame.0 as usize); // Pointer to parent call frame + let frame = sp.offset(0); + sp.push(trampoline_2 as usize + 4); // Entry point, skip initial nop + sp.push(frame as usize); // Pointer to parent call frame sp } #[inline(always)] -pub unsafe fn swap(arg: usize, new_sp: StackPointer, - new_stack: Option<&Stack>) -> (usize, StackPointer) { - // Address of the topmost CFA stack slot. - let mut dummy: usize = mem::uninitialized(); - let new_cfa = if let Some(new_stack) = new_stack { - (new_stack.base() as *mut usize).offset(-4) - } else { - // Just pass a dummy pointer if we aren't linking the stack - &mut dummy - }; - +pub unsafe fn swap_link(arg: usize, new_sp: StackPointer, + new_stack_base: *mut u8) -> (usize, Option) { let ret: usize; - let ret_sp: *mut usize; + let ret_sp: usize; asm!( r#" # Set up the link register - adr lr, 0f + adr x30, 0f # Save the frame pointer and link register; the unwinder uses them to find # the CFA of the caller, and so they have to have the correct value immediately @@ -194,7 +205,7 @@ pub unsafe fn swap(arg: usize, new_sp: StackPointer, # Link the call stacks together by writing the current stack bottom # address to the CFA slot in the new stack. - str x1, [x3] + str x1, [x3, #-32] # Load stack pointer of the new context. mov sp, x2 @@ -211,9 +222,9 @@ pub unsafe fn swap(arg: usize, new_sp: StackPointer, : "={x0}" (ret) "={x1}" (ret_sp) : "{x0}" (arg) - "{x2}" (new_sp.0) - "{x3}" (new_cfa) - :/*x0, "x1",*/"x2", "x3", "x4", "x5", "x6", "x7", + "{x2}" (new_sp.offset(0)) + "{x3}" (new_stack_base) + :/*"x0", "x1",*/"x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28",/*fp,*/ "lr", /*sp,*/ @@ -228,5 +239,76 @@ pub unsafe fn swap(arg: usize, new_sp: StackPointer, // the "alignstack" LLVM inline assembly option does exactly the same // thing on AArch64. : "volatile", "alignstack"); - (ret, StackPointer(ret_sp)) + (ret, mem::transmute(ret_sp)) +} + +#[inline(always)] +pub unsafe fn swap(arg: usize, new_sp: StackPointer) -> (usize, StackPointer) { + // This is identical to swap_link, but without the write to the CFA slot. + let ret: usize; + let ret_sp: usize; + asm!( + r#" + adr x30, 0f + stp x29, x30, [sp, #-16]! + mov x1, sp + mov sp, x2 + ldp x29, x30, [sp], #16 + br x30 + 0: + "# + : "={x0}" (ret) + "={x1}" (ret_sp) + : "{x0}" (arg) + "{x2}" (new_sp.offset(0)) + :/*"x0", "x1",*/"x2", "x3", "x4", "x5", "x6", "x7", + "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", + "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", + "x24", "x25", "x26", "x27", "x28",/*fp,*/ "lr", /*sp,*/ + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", + "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", + "cc", "memory" + : "volatile", "alignstack"); + (ret, mem::transmute(ret_sp)) +} + +#[inline(always)] +pub unsafe fn unwind(new_sp: StackPointer, new_stack_base: *mut u8) { + // Argument to pass to start_unwind, based on the stack base address. + let arg = unwind::unwind_arg(new_stack_base); + + // This is identical to swap_link, except that it performs a tail call to + // start_unwind instead of returning into the target context. + asm!( + r#" + adr x30, 0f + stp x29, x30, [sp, #-16]! + mov x1, sp + str x1, [x3, #-32] + mov sp, x2 + ldp x29, x30, [sp], #16 + + # Jump to the start_unwind function, which will force a stack unwind in + # the target context. This will eventually return to us through the + # stack link. + b ${0} + 0: + "# + : + : "s" (unwind::start_unwind as usize) + "{x0}" (arg) + "{x2}" (new_sp.offset(0)) + "{x3}" (new_stack_base) + : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", + "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", + "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", + "x24", "x25", "x26", "x27", "x28",/*fp,*/ "lr", /*sp,*/ + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", + "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", + "cc", "memory" + : "volatile", "alignstack"); } diff --git a/src/arch/arm.rs b/src/arch/arm.rs new file mode 100644 index 00000000..b40c293e --- /dev/null +++ b/src/arch/arm.rs @@ -0,0 +1,292 @@ +// This file is part of libfringe, a low-level green threading library. +// Copyright (c) Nathan Zadoks , +// whitequark +// Amanieu d'Antras +// Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be +// copied, modified, or distributed except according to those terms. + +// To understand the machine code in this file, keep in mind these facts: +// * ARM AAPCS ABI passes the first argument in r0. We also use r0 to pass a value +// while swapping context; this is an arbitrary choice +// (we clobber all registers and could use any of them) but this allows us +// to reuse the swap function to perform the initial call. +// +// To understand the ARM EHABI CFI code in this file, keep in mind these facts: +// * CFI is "call frame information"; a set of instructions to a debugger or +// an unwinder that allow it to simulate returning from functions. This implies +// restoring every register to its pre-call state, as well as the stack pointer. +// * CFA is "call frame address"; the value of stack pointer right before the call +// instruction in the caller. Everything strictly below CFA (and inclusive until +// the next CFA) is the call frame of the callee. This implies that the return +// address is the part of callee's call frame. +// * Logically, ARM EHABI CFI is a table where rows are instruction pointer values and +// columns describe where registers are spilled (mostly using expressions that +// compute a memory location as CFA+n). A .save pseudoinstruction changes +// the state of a column for all IP numerically larger than the one it's placed +// after. A .pad or .setfp pseudoinstructions change the CFA value similarly. +// * Simulating return is as easy as restoring register values from the CFI table +// and then setting stack pointer to CFA. +// +// A high-level overview of the function of the trampolines is: +// * The 2nd init trampoline puts a controlled value (written in swap to `new_cfa`) +// into r11. This is then used as the CFA for the 1st trampoline. +// * This controlled value points to the bottom of the stack of the parent context, +// which holds the saved r11 and lr from the call to swap(). +// * The 1st init trampoline tells the unwinder to restore r11 and lr +// from the stack frame at r11 (in the parent stack), thus continuing +// unwinding at the swap call site instead of falling off the end of context stack. +use core::mem; +use arch::StackPointer; +use unwind; + +pub const STACK_ALIGNMENT: usize = 8; + +pub unsafe fn init(stack_base: *mut u8, f: unsafe fn(usize, StackPointer)) -> StackPointer { + #[cfg(not(target_vendor = "apple"))] + #[naked] + unsafe extern "C" fn trampoline_1() { + asm!( + r#" + # gdb has a hardcoded check that rejects backtraces where frame addresses + # do not monotonically decrease. It is turned off if the function is called + # "__morestack" and that is hardcoded. So, to make gdb backtraces match + # the actual unwinder behavior, we call ourselves "__morestack" and mark + # the symbol as local; it shouldn't interfere with anything. + __morestack: + .local __morestack + + # Set up the first part of our ARM EHABI CFI linking stacks together. When + # we reach this function from unwinding, r11 will be pointing at the bottom + # of the parent linked stack. This link is set each time swap() is called. + # When unwinding the frame corresponding to this function, a ARM EHABI unwinder + # will use r11+16 as the next call frame address, restore return address (lr) + # from CFA-8 and restore r11 from CFA-16. This mirrors what the second half + # of `swap_trampoline` does. + # .setfp fp, sp + # .save {fp, lr} + + # This nop is here so that the initial swap doesn't return to the start + # of the trampoline, which confuses the unwinder since it will look for + # frame information in the previous symbol rather than this one. It is + # never actually executed. + nop + + .Lend: + .size __morestack, .Lend-__morestack + "# + : : : : "volatile") + } + + #[cfg(target_vendor = "apple")] + #[naked] + unsafe extern "C" fn trampoline_1() { + asm!( + r#" + # Identical to the above, except avoids .local/.size that aren't available on Mach-O. + __morestack: + .private_extern __morestack + # .setfp fp, sp + # .save {fp, lr} + nop + "# + : : : : "volatile") + } + + #[naked] + unsafe extern "C" fn trampoline_2() { + asm!( + r#" + # Set up the second part of our ARM EHABI CFI. + # When unwinding the frame corresponding to this function, a DWARF unwinder + # will restore r11 (and thus CFA of the first trampoline) from the stack slot. + # This stack slot is updated every time swap() is called to point to the bottom + # of the stack of the context switch just switched from. + # .setfp fp, sp + # .save {fp, lr} + + # This nop is here so that the return address of the swap trampoline + # doesn't point to the start of the symbol. This confuses gdb's backtraces, + # causing them to think the parent function is trampoline_1 instead of + # trampoline_2. + nop + + # Call unwind_wrapper with the provided function and the stack base address. + add r2, sp, #16 + ldr r3, [sp, #8] + bl ${0} + + # Restore the stack pointer of the parent context. No CFI adjustments + # are needed since we have the same stack frame as trampoline_1. + ldr sp, [sp] + + # Load frame and instruction pointers of the parent context. + pop {fp, lr} + + # If the returned value is nonzero, trigger an unwind in the parent + # context with the given exception object. + cmp r0, #0 + bne ${1} + + # Clear the stack pointer. We can't call into this context any more once + # the function has returned. + mov r1, #0 + + # Return into the new context. Use `r12` instead of `lr` to avoid + # return address mispredictions. + mov r12, lr + bx r12 + "# + : + : "s" (unwind::unwind_wrapper as usize) + "s" (unwind::start_unwind as usize) + : : "volatile") + } + + // We set up the stack in a somewhat special way so that to the unwinder it + // looks like trampoline_1 has called trampoline_2, which has in turn called + // swap::trampoline. + // + // There are 2 call frames in this setup, each containing the return address + // followed by the r11 value for that frame. This setup supports unwinding + // using DWARF CFI as well as the frame pointer-based unwinding used by tools + // such as perf or dtrace. + let mut sp = StackPointer::new(stack_base); + + sp.push(0 as usize); // Padding to ensure the stack is properly aligned + sp.push(f as usize); // Function that trampoline_2 should call + + // Call frame for trampoline_2. The CFA slot is updated by swap::trampoline + // each time a context switch is performed. + sp.push(trampoline_1 as usize + 4); // Return after the nop + sp.push(0xdead0cfa); // CFA slot + + // Call frame for swap::trampoline. We set up the r11 value to point to the + // parent call frame. + let frame = sp.offset(0); + sp.push(trampoline_2 as usize + 4); // Entry point, skip initial nop + sp.push(frame as usize); // Pointer to parent call frame + + sp +} + +#[inline(always)] +pub unsafe fn swap_link(arg: usize, new_sp: StackPointer, + new_stack_base: *mut u8) -> (usize, Option) { + let ret: usize; + let ret_sp: usize; + asm!( + r#" + # Set up the link register + adr lr, 0f + + # Save the frame pointer and link register; the unwinder uses them to find + # the CFA of the caller, and so they have to have the correct value immediately + # after the call instruction that invoked the trampoline. + push {fp, lr} + + # Pass the stack pointer of the old context to the new one. + mov r1, sp + + # Link the call stacks together by writing the current stack bottom + # address to the CFA slot in the new stack. + str sp, [r3, #-16] + + # Load stack pointer of the new context. + mov sp, r2 + + # Load frame and instruction pointers of the new context. + pop {fp, r12} + + # Return into the new context. Use `r12` instead of `lr` to avoid + # return address mispredictions. + bx r12 + + 0: + "# + : "={r0}" (ret) + "={r1}" (ret_sp) + : "{r0}" (arg) + "{r2}" (new_sp.offset(0)) + "{r3}" (new_stack_base) + :/*r0, r1,*/ "r2", "r3", "r4", "r5", "r6", "r7", + "r8", "r9", "r10",/*r11,*/"r12",/*sp,*/ "lr", /*pc,*/ + "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", + "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", + "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", + "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31", + "cc", "memory" + : "volatile"); + (ret, mem::transmute(ret_sp)) +} + +#[inline(always)] +pub unsafe fn swap(arg: usize, new_sp: StackPointer) -> (usize, StackPointer) { + // This is identical to swap_link, but without the write to the CFA slot. + let ret: usize; + let ret_sp: usize; + asm!( + r#" + adr lr, 0f + push {fp, lr} + mov r1, sp + mov sp, r2 + pop {fp, r12} + bx r12 + 0: + "# + : "={r0}" (ret) + "={r1}" (ret_sp) + : "{r0}" (arg) + "{r2}" (new_sp.offset(0)) + :/*r0, r1,*/ "r2", "r3", "r4", "r5", "r6", "r7", + "r8", "r9", "r10",/*r11,*/"r12",/*sp,*/ "lr", /*pc,*/ + "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", + "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", + "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", + "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31", + "cc", "memory" + // We need the "alignstack" attribute here to ensure that the stack is + // properly aligned if a call to start_unwind needs to be injected into + // our stack context. + : "volatile", "alignstack"); + (ret, mem::transmute(ret_sp)) +} + +#[inline(always)] +pub unsafe fn unwind(new_sp: StackPointer, new_stack_base: *mut u8) { + // Argument to pass to start_unwind, based on the stack base address. + let arg = unwind::unwind_arg(new_stack_base); + + // This is identical to swap_link, except that it performs a tail call to + // start_unwind instead of returning into the target context. + asm!( + r#" + adr lr, 0f + push {fp, lr} + str sp, [r3, #-16] + mov sp, r2 + pop {fp, r12} + + # Jump to the start_unwind function, which will force a stack unwind in + # the target context. This will eventually return to us through the + # stack link. + b ${0} + + 0: + "# + : + : "s" (unwind::start_unwind as usize) + "{r0}" (arg) + "{r2}" (new_sp.offset(0)) + "{r3}" (new_stack_base) + : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", + "r8", "r9", "r10",/*r11,*/"r12",/*sp,*/ "lr", /*pc,*/ + "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", + "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", + "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", + "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31", + "cc", "memory" + : "volatile"); +} diff --git a/src/arch/mod.rs b/src/arch/mod.rs index 1ed3aee5..f603980b 100644 --- a/src/arch/mod.rs +++ b/src/arch/mod.rs @@ -7,73 +7,98 @@ // copied, modified, or distributed except according to those terms. pub use self::imp::*; +use core::ptr::NonNull; #[allow(unused_attributes)] // rust-lang/rust#35584 #[cfg_attr(target_arch = "x86", path = "x86.rs")] #[cfg_attr(target_arch = "x86_64", path = "x86_64.rs")] #[cfg_attr(target_arch = "aarch64", path = "aarch64.rs")] +#[cfg_attr(target_arch = "arm", path = "arm.rs")] #[cfg_attr(target_arch = "or1k", path = "or1k.rs")] mod imp; +#[derive(Debug, Clone, Copy)] +pub struct StackPointer(NonNull); + +impl StackPointer { + #[inline(always)] + pub unsafe fn push(&mut self, val: usize) { + self.0 = NonNull::new_unchecked(self.0.as_ptr().offset(-1)); + *self.0.as_mut() = val; + } + + #[inline(always)] + pub unsafe fn new(sp: *mut u8) -> StackPointer { + StackPointer(NonNull::new_unchecked(sp as *mut usize)) + } + + #[inline(always)] + pub unsafe fn offset(&self, count: isize) -> *mut usize { + self.0.as_ptr().offset(count) + } +} + #[cfg(test)] mod tests { extern crate test; + #[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] extern crate simd; use arch::{self, StackPointer}; - use ::OsStack; + use ::{Stack, OsStack}; #[test] fn context() { - unsafe extern "C" fn adder(arg: usize, stack_ptr: StackPointer) -> ! { + unsafe fn adder(arg: usize, stack_ptr: StackPointer) { println!("it's alive! arg: {}", arg); - let (arg, stack_ptr) = arch::swap(arg + 1, stack_ptr, None); + let (arg, stack_ptr) = arch::swap(arg + 1, stack_ptr); println!("still alive! arg: {}", arg); - arch::swap(arg + 1, stack_ptr, None); + arch::swap(arg + 1, stack_ptr); panic!("i should be dead"); } unsafe { let stack = OsStack::new(4 << 20).unwrap(); - let stack_ptr = arch::init(&stack, adder); + let stack_ptr = arch::init(stack.base(), adder); - let (ret, stack_ptr) = arch::swap(10, stack_ptr, Some(&stack)); + let (ret, stack_ptr) = arch::swap_link(10, stack_ptr, stack.base()); assert_eq!(ret, 11); - let (ret, _) = arch::swap(50, stack_ptr, Some(&stack)); + let (ret, _) = arch::swap_link(50, stack_ptr.unwrap(), stack.base()); assert_eq!(ret, 51); } } + #[cfg(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))] #[test] fn context_simd() { - unsafe extern "C" fn permuter(arg: usize, stack_ptr: StackPointer) -> ! { + unsafe fn permuter(arg: usize, stack_ptr: StackPointer) { // This will crash if the stack is not aligned properly. let x = simd::i32x4::splat(arg as i32); let y = x * x; println!("simd result: {:?}", y); - let (_, stack_ptr) = arch::swap(0, stack_ptr, None); + let (_, stack_ptr) = arch::swap(0, stack_ptr); // And try again after a context switch. let x = simd::i32x4::splat(arg as i32); let y = x * x; println!("simd result: {:?}", y); - arch::swap(0, stack_ptr, None); + arch::swap(0, stack_ptr); panic!("i should be dead"); } unsafe { let stack = OsStack::new(4 << 20).unwrap(); - let stack_ptr = arch::init(&stack, permuter); + let stack_ptr = arch::init(stack.base(), permuter); - let (_, stack_ptr) = arch::swap(10, stack_ptr, Some(&stack)); - arch::swap(20, stack_ptr, Some(&stack)); + let (_, stack_ptr) = arch::swap_link(10, stack_ptr, stack.base()); + arch::swap_link(20, stack_ptr.unwrap(), stack.base()); } } - unsafe extern "C" fn do_panic(arg: usize, stack_ptr: StackPointer) -> ! { + unsafe fn do_panic(arg: usize, stack_ptr: StackPointer) { match arg { 0 => panic!("arg=0"), 1 => { - arch::swap(0, stack_ptr, None); + arch::swap(0, stack_ptr); panic!("arg=1"); } _ => unreachable!() @@ -85,9 +110,9 @@ mod tests { fn panic_after_start() { unsafe { let stack = OsStack::new(4 << 20).unwrap(); - let stack_ptr = arch::init(&stack, do_panic); + let stack_ptr = arch::init(stack.base(), do_panic); - arch::swap(0, stack_ptr, Some(&stack)); + arch::swap_link(0, stack_ptr, stack.base()); } } @@ -96,20 +121,33 @@ mod tests { fn panic_after_swap() { unsafe { let stack = OsStack::new(4 << 20).unwrap(); - let stack_ptr = arch::init(&stack, do_panic); + let stack_ptr = arch::init(stack.base(), do_panic); + + let (_, stack_ptr) = arch::swap_link(1, stack_ptr, stack.base()); + arch::swap_link(0, stack_ptr.unwrap(), stack.base()); + } + } + + #[test] + fn ret() { + unsafe fn ret2(_: usize, _: StackPointer) {} + + unsafe { + let stack = OsStack::new(4 << 20).unwrap(); + let stack_ptr = arch::init(stack.base(), ret2); - let (_, stack_ptr) = arch::swap(1, stack_ptr, Some(&stack)); - arch::swap(0, stack_ptr, Some(&stack)); + let (_, stack_ptr) = arch::swap_link(0, stack_ptr, stack.base()); + assert!(stack_ptr.is_none()); } } #[bench] fn swap(b: &mut test::Bencher) { - unsafe extern "C" fn loopback(mut arg: usize, mut stack_ptr: StackPointer) -> ! { + unsafe fn loopback(mut arg: usize, mut stack_ptr: StackPointer) { // This deliberately does not ignore arg, to measure the time it takes // to move the return value between registers. loop { - let data = arch::swap(arg, stack_ptr, None); + let data = arch::swap(arg, stack_ptr); arg = data.0; stack_ptr = data.1; } @@ -117,10 +155,10 @@ mod tests { unsafe { let stack = OsStack::new(4 << 20).unwrap(); - let mut stack_ptr = arch::init(&stack, loopback); + let mut stack_ptr = arch::init(stack.base(), loopback); b.iter(|| for _ in 0..10 { - stack_ptr = arch::swap(0, stack_ptr, Some(&stack)).1; + stack_ptr = arch::swap_link(0, stack_ptr, stack.base()).1.unwrap(); }); } } diff --git a/src/arch/or1k.rs b/src/arch/or1k.rs index b74f2b71..b5b42aa0 100644 --- a/src/arch/or1k.rs +++ b/src/arch/or1k.rs @@ -42,14 +42,12 @@ // from the stack frame at r2 (in the parent stack), thus continuing // unwinding at the swap call site instead of falling off the end of context stack. use core::mem; -use stack::Stack; +use arch::StackPointer; +use unwind; pub const STACK_ALIGNMENT: usize = 4; -#[derive(Debug, Clone, Copy)] -pub struct StackPointer(*mut usize); - -pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize, StackPointer) -> !) -> StackPointer { +pub unsafe fn init(stack_base: *mut u8, f: unsafe fn(usize, StackPointer)) -> StackPointer { #[naked] unsafe extern "C" fn trampoline_1() { asm!( @@ -104,17 +102,38 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize, StackPointer) - # trampoline_2. l.nop - # Call the provided function. - l.lwz r5, 8(r1) - l.jalr r5 + # Call unwind_wrapper with the provided function and the stack base address. + l.addi r5, r1, 12 + l.lwz r6, 8(r1) + l.jal ${0} l.nop - "# - : : : : "volatile") - } - unsafe fn push(sp: &mut StackPointer, val: usize) { - sp.0 = sp.0.offset(-1); - *sp.0 = val + # Restore the stack pointer of the parent context. No CFI adjustments + # are needed since we have the same stack frame as trampoline_1. + l.lwz r1, 0(r1) + + # Load frame and instruction pointers of the parent context. + l.lwz r2, -4(r1) + l.lwz r9, -8(r1) + + # If the returned value is nonzero, trigger an unwind in the parent + # context with the given exception object. + l.or r4, r0, r11 + l.sfeq r11, r0 + l.bf ${1} + + # Clear the stack pointer. We can't call into this context any more once + # the function has returned. + l.or r4, r0, r0 + + # Return into the parent context. + l.jr r9 + l.nop + "# + : + : "s" (unwind::unwind_wrapper as usize) + "s" (unwind::start_unwind as usize) + : : "volatile") } // We set up the stack in a somewhat special way so that to the unwinder it @@ -125,20 +144,20 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize, StackPointer) - // followed by the r2 value for that frame. This setup supports unwinding // using DWARF CFI as well as the frame pointer-based unwinding used by tools // such as perf or dtrace. - let mut sp = StackPointer(stack.base() as *mut usize); + let mut sp = StackPointer::new(stack_base); - push(&mut sp, f as usize); // Function that trampoline_2 should call + sp.push(f as usize); // Function that trampoline_2 should call // Call frame for trampoline_2. The CFA slot is updated by swap::trampoline // each time a context switch is performed. - push(&mut sp, 0xdead0cfa); // CFA slot - push(&mut sp, trampoline_1 as usize + 4); // Return after the nop + sp.push(0xdead0cfa); // CFA slot + sp.push(trampoline_1 as usize + 4); // Return after the nop // Call frame for swap::trampoline. We set up the r2 value to point to the // parent call frame. let frame = sp; - push(&mut sp, frame.0 as usize); // Pointer to parent call frame - push(&mut sp, trampoline_2 as usize + 4); // Entry point, skip initial nop + sp.push(frame.offset(0) as usize); // Pointer to parent call frame + sp.push(trampoline_2 as usize + 4); // Entry point, skip initial nop // The last two values are read by the swap trampoline and are actually in the // red zone and not below the stack pointer. @@ -146,17 +165,8 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize, StackPointer) - } #[inline(always)] -pub unsafe fn swap(arg: usize, new_sp: StackPointer, - new_stack: Option<&Stack>) -> (usize, StackPointer) { - // Address of the topmost CFA stack slot. - let mut dummy: usize = mem::uninitialized(); - let new_cfa = if let Some(new_stack) = new_stack { - (new_stack.base() as *mut usize).offset(-2) - } else { - // Just pass a dummy pointer if we aren't linking the stack - &mut dummy - }; - +pub unsafe fn swap_link(arg: usize, new_sp: StackPointer, + new_stack_base: *mut u8) -> (usize, Option) { #[naked] unsafe extern "C" fn trampoline() { asm!( @@ -172,14 +182,13 @@ pub unsafe fn swap(arg: usize, new_sp: StackPointer, # Link the call stacks together by writing the current stack bottom # address to the CFA slot in the new stack. l.addi r7, r1, -8 - l.sw 0(r6), r7 + l.sw -8(r6), r7 # Pass the stack pointer of the old context to the new one. l.or r4, r0, r1 # Load stack pointer of the new context. l.or r1, r0, r5 - # Restore frame pointer and link register of the new context. # Load frame and instruction pointers of the new context. l.lwz r2, -4(r1) l.lwz r9, -8(r1) @@ -192,7 +201,7 @@ pub unsafe fn swap(arg: usize, new_sp: StackPointer, } let ret: usize; - let ret_sp: *mut usize; + let ret_sp: usize; asm!( r#" # Call the trampoline to switch to the new context. @@ -203,13 +212,106 @@ pub unsafe fn swap(arg: usize, new_sp: StackPointer, "={r4}" (ret_sp) : "s" (trampoline as usize) "{r3}" (arg) - "{r5}" (new_sp.0) - "{r6}" (new_cfa) + "{r5}" (new_sp.offset(0)) + "{r6}" (new_stack_base) + :/*"r0", "r1", "r2", "r3", "r4",*/"r5", "r6", "r7", + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", + "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", + "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", + "cc", "memory" + : "volatile"); + (ret, mem::transmute(ret_sp)) +} + +#[inline(always)] +pub unsafe fn swap(arg: usize, new_sp: StackPointer) -> (usize, StackPointer) { + // This is identical to swap_link, but without the write to the CFA slot. + #[naked] + unsafe extern "C" fn trampoline() { + asm!( + r#" + l.sw -4(r1), r2 + l.sw -8(r1), r9 + .cfi_offset r2, -4 + .cfi_offset r9, -8 + l.or r4, r0, r1 + l.or r1, r0, r5 + l.lwz r2, -4(r1) + l.lwz r9, -8(r1) + l.jr r9 + l.nop + "# + : : : : "volatile") + } + + let ret: usize; + let ret_sp: usize; + asm!( + r#" + l.jal ${2} + l.nop + "# + : "={r3}" (ret) + "={r4}" (ret_sp) + : "s" (trampoline as usize) + "{r3}" (arg) + "{r5}" (new_sp.offset(0)) :/*"r0", "r1", "r2", "r3", "r4",*/"r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", "cc", "memory" : "volatile"); - (ret, StackPointer(ret_sp)) + (ret, mem::transmute(ret_sp)) +} + +#[inline(always)] +pub unsafe fn unwind(new_sp: StackPointer, new_stack_base: *mut u8) { + // Argument to pass to start_unwind, based on the stack base address. + let arg = unwind::unwind_arg(new_stack_base); + + // This is identical to swap_link, except that it performs a tail call to + // start_unwind instead of returning into the target context. + #[naked] + unsafe extern "C" fn trampoline() { + asm!( + r#" + l.sw -4(r1), r2 + l.sw -8(r1), r9 + .cfi_offset r2, -4 + .cfi_offset r9, -8 + l.addi r7, r1, -8 + l.sw -8(r6), r7 + l.or r1, r0, r5 + l.lwz r2, -4(r1) + l.lwz r9, -8(r1) + + # Jump to the start_unwind function, which will force a stack unwind in + # the target context. This will eventually return to us through the + # stack link. + l.j ${0} + l.nop + "# + : + : "s" (unwind::start_unwind as usize) + : : "volatile") + } + + asm!( + r#" + # Call the trampoline to switch to the new context. + l.jal ${0} + l.nop + "# + : + : "s" (trampoline as usize) + "{r3}" (arg) + "{r5}" (new_sp.offset(0)) + "{r6}" (new_stack_base) + :/*"r0", "r1", "r2",*/"r3", "r4", "r5", "r6", "r7", + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", + "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", + "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", + "cc", "memory" + : "volatile"); } diff --git a/src/arch/x86.rs b/src/arch/x86.rs index 1543a03e..65cf30cf 100644 --- a/src/arch/x86.rs +++ b/src/arch/x86.rs @@ -42,14 +42,29 @@ // address from the stack frame at %ebp (in the parent stack), thus continuing // unwinding at the swap call site instead of falling off the end of context stack. use core::mem; -use stack::Stack; +use arch::StackPointer; +use unwind; pub const STACK_ALIGNMENT: usize = 16; -#[derive(Debug, Clone, Copy)] -pub struct StackPointer(*mut usize); +// Rust's fastcall support is currently broken due to #18086, so we use a +// custom wrapper instead. We don't quite follow the normal fastcall ABI since +// we accept the first parameter in %edi rather than the usual %ecx. +#[naked] +unsafe extern "C" fn fastcall_start_unwind() { + asm!( + r#" + subl $$12, %esp + .cfi_adjust_cfa_offset 12 + movl %edi, (%esp) + call ${0:c} + "# + : + : "s" (unwind::start_unwind as usize) + : : "volatile") +} -pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize, StackPointer) -> !) -> StackPointer { +pub unsafe fn init(stack_base: *mut u8, f: unsafe fn(usize, StackPointer)) -> StackPointer { #[cfg(not(target_vendor = "apple"))] #[naked] unsafe extern "C" fn trampoline_1() { @@ -124,18 +139,44 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize, StackPointer) - # trampoline_2. nop - # Push arguments. + # Call unwind_wrapper with the provided function and the CFA address. + leal 16(%esp), %edx + pushl 8(%esp) + pushl %edx pushl %esi pushl %edi - # Call the provided function. - calll *16(%esp) - "# - : : : : "volatile") - } + call ${0:c} + + # Restore the stack pointer of the parent context. No CFI adjustments + # are needed since we have the same stack frame as trampoline_1. + movl 16(%esp), %esp - unsafe fn push(sp: &mut StackPointer, val: usize) { - sp.0 = sp.0.offset(-1); - *sp.0 = val + # Restore frame pointer of the parent context. + popl %ebp + .cfi_adjust_cfa_offset -4 + .cfi_restore %ebp + + # If the returned value is nonzero, trigger an unwind in the parent + # context with the given exception object. + movl %eax, %edi + testl %eax, %eax + jnz ${1:c} + + # Clear the stack pointer. We can't call into this context any more once + # the function has returned. + xorl %esi, %esi + + # Return into the parent context. Use `pop` and `jmp` instead of a `ret` + # to avoid return address mispredictions (~8ns per `ret` on Ivy Bridge). + popl %eax + .cfi_adjust_cfa_offset -4 + .cfi_register %eip, %eax + jmpl *%eax + "# + : + : "s" (unwind::unwind_wrapper as usize) + "s" (fastcall_start_unwind as usize) + : : "volatile") } // We set up the stack in a somewhat special way so that to the unwinder it @@ -146,39 +187,28 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize, StackPointer) - // followed by the %ebp value for that frame. This setup supports unwinding // using DWARF CFI as well as the frame pointer-based unwinding used by tools // such as perf or dtrace. - let mut sp = StackPointer(stack.base() as *mut usize); + let mut sp = StackPointer::new(stack_base); - push(&mut sp, 0 as usize); // Padding to ensure the stack is properly aligned - push(&mut sp, 0 as usize); // Padding to ensure the stack is properly aligned - push(&mut sp, 0 as usize); // Padding to ensure the stack is properly aligned - push(&mut sp, f as usize); // Function that trampoline_2 should call + sp.push(0 as usize); // Padding to ensure the stack is properly aligned + sp.push(f as usize); // Function that trampoline_2 should call // Call frame for trampoline_2. The CFA slot is updated by swap::trampoline // each time a context switch is performed. - push(&mut sp, trampoline_1 as usize + 2); // Return after the 2 nops - push(&mut sp, 0xdead0cfa); // CFA slot + sp.push(trampoline_1 as usize + 2); // Return after the 2 nops + sp.push(0xdead0cfa); // CFA slot // Call frame for swap::trampoline. We set up the %ebp value to point to the // parent call frame. - let frame = sp; - push(&mut sp, trampoline_2 as usize + 1); // Entry point, skip initial nop - push(&mut sp, frame.0 as usize); // Pointer to parent call frame + let frame = sp.offset(0); + sp.push(trampoline_2 as usize + 1); // Entry point, skip initial nop + sp.push(frame as usize); // Pointer to parent call frame sp } #[inline(always)] -pub unsafe fn swap(arg: usize, new_sp: StackPointer, - new_stack: Option<&Stack>) -> (usize, StackPointer) { - // Address of the topmost CFA stack slot. - let mut dummy: usize = mem::uninitialized(); - let new_cfa = if let Some(new_stack) = new_stack { - (new_stack.base() as *mut usize).offset(-6) - } else { - // Just pass a dummy pointer if we aren't linking the stack - &mut dummy - }; - +pub unsafe fn swap_link(arg: usize, new_sp: StackPointer, + new_stack_base: *mut u8) -> (usize, Option) { #[naked] unsafe extern "C" fn trampoline() { asm!( @@ -192,7 +222,7 @@ pub unsafe fn swap(arg: usize, new_sp: StackPointer, # Link the call stacks together by writing the current stack bottom # address to the CFA slot in the new stack. - movl %esp, (%ecx) + movl %esp, -16(%ecx) # Pass the stack pointer of the old context to the new one. movl %esp, %esi @@ -215,7 +245,7 @@ pub unsafe fn swap(arg: usize, new_sp: StackPointer, } let ret: usize; - let ret_sp: *mut usize; + let ret_sp: usize; asm!( r#" # Push instruction pointer of the old context and switch to @@ -226,12 +256,103 @@ pub unsafe fn swap(arg: usize, new_sp: StackPointer, "={esi}" (ret_sp) : "s" (trampoline as usize) "{edi}" (arg) - "{edx}" (new_sp.0) - "{ecx}" (new_cfa) - : "eax", "ebx", "ecx", "edx", /*"esi", "edi", "ebp", "esp",*/ + "{edx}" (new_sp.offset(0)) + "{ecx}" (new_stack_base) + : "eax", "ebx", "ecx", "edx",/*"esi", "edi", "ebp", "esp",*/ + "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", + "cc", "dirflag", "fpsr", "flags", "memory" + : "volatile"); + (ret, mem::transmute(ret_sp)) +} + +#[inline(always)] +pub unsafe fn swap(arg: usize, new_sp: StackPointer) -> (usize, StackPointer) { + // This is identical to swap_link, but without the write to the CFA slot. + #[naked] + unsafe extern "C" fn trampoline() { + asm!( + r#" + pushl %ebp + .cfi_adjust_cfa_offset 4 + .cfi_rel_offset %ebp, 0 + movl %esp, %esi + movl %edx, %esp + popl %ebp + .cfi_adjust_cfa_offset -4 + .cfi_restore %ebp + popl %eax + .cfi_adjust_cfa_offset -4 + .cfi_register %eip, %eax + jmpl *%eax + "# + : : : : "volatile") + } + + let ret: usize; + let ret_sp: usize; + asm!( + r#" + call ${2:c} + "# + : "={edi}" (ret) + "={esi}" (ret_sp) + : "s" (trampoline as usize) + "{edi}" (arg) + "{edx}" (new_sp.offset(0)) + : "eax", "ebx", "ecx", "edx",/*"esi", "edi", "ebp", "esp",*/ + "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", + "cc", "dirflag", "fpsr", "flags", "memory" + // We need the "alignstack" attribute here to ensure that the stack is + // properly aligned if a call to start_unwind needs to be injected into + // our stack context. + : "volatile", "alignstack"); + (ret, mem::transmute(ret_sp)) +} + +#[inline(always)] +pub unsafe fn unwind(new_sp: StackPointer, new_stack_base: *mut u8) { + // Argument to pass to start_unwind, based on the stack base address. + let arg = unwind::unwind_arg(new_stack_base); + + // This is identical to swap_link, except that it performs a tail call to + // start_unwind instead of returning into the target context. + #[naked] + unsafe extern "C" fn trampoline() { + asm!( + r#" + pushl %ebp + .cfi_adjust_cfa_offset 4 + .cfi_rel_offset %ebp, 0 + movl %esp, -16(%ecx) + movl %edx, %esp + popl %ebp + .cfi_adjust_cfa_offset -4 + .cfi_restore %ebp + + # Jump to the start_unwind function, which will force a stack unwind in + # the target context. This will eventually return to us through the + # stack link. + jmp ${0:c} + "# + : + : "s" (fastcall_start_unwind as usize) + : : "volatile") + } + + asm!( + r#" + call ${0:c} + "# + : + : "s" (trampoline as usize) + "{edi}" (arg) + "{edx}" (new_sp.offset(0)) + "{ecx}" (new_stack_base) + : "eax", "ebx", "ecx", "edx", "esi", "edi",/*"ebp", "esp",*/ "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "cc", "dirflag", "fpsr", "flags", "memory" : "volatile"); - (ret, StackPointer(ret_sp)) } diff --git a/src/arch/x86_64.rs b/src/arch/x86_64.rs index 1da7dc28..c69ac61b 100644 --- a/src/arch/x86_64.rs +++ b/src/arch/x86_64.rs @@ -47,14 +47,12 @@ // address from the stack frame at %rbp (in the parent stack), thus continuing // unwinding at the swap call site instead of falling off the end of context stack. use core::mem; -use stack::Stack; +use arch::StackPointer; +use unwind; pub const STACK_ALIGNMENT: usize = 16; -#[derive(Debug, Clone, Copy)] -pub struct StackPointer(*mut usize); - -pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize, StackPointer) -> !) -> StackPointer { +pub unsafe fn init(stack_base: *mut u8, f: unsafe fn(usize, StackPointer)) -> StackPointer { #[cfg(not(target_vendor = "apple"))] #[naked] unsafe extern "C" fn trampoline_1() { @@ -129,15 +127,41 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize, StackPointer) - # trampoline_2. nop - # Call the provided function. - call *16(%rsp) - "# - : : : : "volatile") - } + # Call unwind_wrapper with the provided function and the stack base address. + leaq 32(%rsp), %rdx + movq 16(%rsp), %rcx + call ${0:c} + + # Restore the stack pointer of the parent context. No CFI adjustments + # are needed since we have the same stack frame as trampoline_1. + movq (%rsp), %rsp + + # Restore frame pointer of the parent context. + popq %rbp + .cfi_adjust_cfa_offset -8 + .cfi_restore %rbp + + # If the returned value is nonzero, trigger an unwind in the parent + # context with the given exception object. + movq %rax, %rdi + testq %rax, %rax + jnz ${1:c} - unsafe fn push(sp: &mut StackPointer, val: usize) { - sp.0 = sp.0.offset(-1); - *sp.0 = val + # Clear the stack pointer. We can't call into this context any more once + # the function has returned. + xorq %rsi, %rsi + + # Return into the parent context. Use `pop` and `jmp` instead of a `ret` + # to avoid return address mispredictions (~8ns per `ret` on Ivy Bridge). + popq %rax + .cfi_adjust_cfa_offset -8 + .cfi_register %rip, %rax + jmpq *%rax + "# + : + : "s" (unwind::unwind_wrapper as usize) + "s" (unwind::start_unwind as usize) + : : "volatile") } // We set up the stack in a somewhat special way so that to the unwinder it @@ -148,39 +172,30 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize, StackPointer) - // followed by the %rbp value for that frame. This setup supports unwinding // using DWARF CFI as well as the frame pointer-based unwinding used by tools // such as perf or dtrace. - let mut sp = StackPointer(stack.base() as *mut usize); + let mut sp = StackPointer::new(stack_base); - push(&mut sp, 0 as usize); // Padding to ensure the stack is properly aligned - push(&mut sp, f as usize); // Function that trampoline_2 should call + sp.push(0 as usize); // Padding to ensure the stack is properly aligned + sp.push(f as usize); // Function that trampoline_2 should call // Call frame for trampoline_2. The CFA slot is updated by swap::trampoline // each time a context switch is performed. - push(&mut sp, trampoline_1 as usize + 2); // Return after the 2 nops - push(&mut sp, 0xdeaddeaddead0cfa); // CFA slot + sp.push(trampoline_1 as usize + 2); // Return after the 2 nops + sp.push(0xdeaddeaddead0cfa); // CFA slot // Call frame for swap::trampoline. We set up the %rbp value to point to the // parent call frame. - let frame = sp; - push(&mut sp, trampoline_2 as usize + 1); // Entry point, skip initial nop - push(&mut sp, frame.0 as usize); // Pointer to parent call frame + let frame = sp.offset(0); + sp.push(trampoline_2 as usize + 1); // Entry point, skip initial nop + sp.push(frame as usize); // Pointer to parent call frame sp } #[inline(always)] -pub unsafe fn swap(arg: usize, new_sp: StackPointer, - new_stack: Option<&Stack>) -> (usize, StackPointer) { - // Address of the topmost CFA stack slot. - let mut dummy: usize = mem::uninitialized(); - let new_cfa = if let Some(new_stack) = new_stack { - (new_stack.base() as *mut usize).offset(-4) - } else { - // Just pass a dummy pointer if we aren't linking the stack - &mut dummy - }; - +pub unsafe fn swap_link(arg: usize, new_sp: StackPointer, + new_stack_base: *mut u8) -> (usize, Option) { let ret: usize; - let ret_sp: *mut usize; + let ret_sp: usize; asm!( r#" # Push the return address @@ -194,7 +209,7 @@ pub unsafe fn swap(arg: usize, new_sp: StackPointer, # Link the call stacks together by writing the current stack bottom # address to the CFA slot in the new stack. - movq %rsp, (%rcx) + movq %rsp, -32(%rcx) # Pass the stack pointer of the old context to the new one. movq %rsp, %rsi @@ -215,8 +230,8 @@ pub unsafe fn swap(arg: usize, new_sp: StackPointer, : "={rdi}" (ret) "={rsi}" (ret_sp) : "{rdi}" (arg) - "{rdx}" (new_sp.0) - "{rcx}" (new_cfa) + "{rdx}" (new_sp.offset(0)) + "{rcx}" (new_stack_base) : "rax", "rbx", "rcx", "rdx", /*"rsi", "rdi", "rbp", "rsp",*/ "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", @@ -231,5 +246,76 @@ pub unsafe fn swap(arg: usize, new_sp: StackPointer, // the "alignstack" LLVM inline assembly option does exactly the same // thing on x86_64. : "volatile", "alignstack"); - (ret, StackPointer(ret_sp)) + (ret, mem::transmute(ret_sp)) +} + +#[inline(always)] +pub unsafe fn swap(arg: usize, new_sp: StackPointer) -> (usize, StackPointer) { + // This is identical to swap_link, but without the write to the CFA slot. + let ret: usize; + let ret_sp: usize; + asm!( + r#" + leaq 0f(%rip), %rax + pushq %rax + pushq %rbp + movq %rsp, %rsi + movq %rdx, %rsp + popq %rbp + popq %rax + jmpq *%rax + 0: + "# + : "={rdi}" (ret) + "={rsi}" (ret_sp) + : "{rdi}" (arg) + "{rdx}" (new_sp.offset(0)) + : "rax", "rbx", "rcx", "rdx", /*"rsi", "rdi", "rbp", "rsp",*/ + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", + "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", + "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "xmm16", "xmm17", "xmm18", "xmm19", "xmm20", "xmm21", "xmm22", "xmm23", + "xmm24", "xmm25", "xmm26", "xmm27", "xmm28", "xmm29", "xmm30", "xmm31", + "cc", "dirflag", "fpsr", "flags", "memory" + : "volatile", "alignstack"); + (ret, mem::transmute(ret_sp)) +} + +#[inline(always)] +pub unsafe fn unwind(new_sp: StackPointer, new_stack_base: *mut u8) { + // Argument to pass to start_unwind, based on the stack base address. + let arg = unwind::unwind_arg(new_stack_base); + + // This is identical to swap_link, except that it performs a tail call to + // start_unwind instead of returning into the target context. + asm!( + r#" + leaq 0f(%rip), %rax + pushq %rax + pushq %rbp + movq %rsp, -32(%rcx) + movq %rdx, %rsp + popq %rbp + + # Jump to the start_unwind function, which will force a stack unwind in + # the target context. This will eventually return to us through the + # stack link. + jmp ${0:c} + 0: + "# + : + : "s" (unwind::start_unwind as usize) + "{rdi}" (arg) + "{rdx}" (new_sp.offset(0)) + "{rcx}" (new_stack_base) + : "rax", "rbx", "rcx", "rdx", "rsi", "rdi", /*"rbp", "rsp",*/ + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", + "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", + "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "xmm16", "xmm17", "xmm18", "xmm19", "xmm20", "xmm21", "xmm22", "xmm23", + "xmm24", "xmm25", "xmm26", "xmm27", "xmm28", "xmm29", "xmm30", "xmm31", + "cc", "dirflag", "fpsr", "flags", "memory" + : "volatile", "alignstack"); } diff --git a/src/generator.rs b/src/generator.rs index e8adb8f6..207de5ad 100644 --- a/src/generator.rs +++ b/src/generator.rs @@ -19,6 +19,36 @@ use stack; use debug; use arch::{self, StackPointer}; +// Wrapper to prevent the compiler from automatically dropping a value when it +// goes out of scope. This is particularly useful when dealing with unwinding +// since mem::forget won't be executed when unwinding. +#[allow(unions_with_drop_fields)] +union NoDrop { + inner: T, +} + +// Try to pack a value into a usize if it fits, otherwise pass its address as a usize. +unsafe fn encode_usize(val: &NoDrop) -> usize { + if mem::size_of::() <= mem::size_of::() && + mem::align_of::() <= mem::align_of::() { + let mut out = 0; + ptr::copy_nonoverlapping(&val.inner, &mut out as *mut usize as *mut T, 1); + out + } else { + &val.inner as *const T as usize + } +} + +// Unpack a usize produced by encode_usize. +unsafe fn decode_usize(val: usize) -> T { + if mem::size_of::() <= mem::size_of::() && + mem::align_of::() <= mem::align_of::() { + ptr::read(&val as *const usize as *const T) + } else { + ptr::read(val as *const T) + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum State { /// Generator can be resumed. This is the initial state. @@ -83,18 +113,12 @@ pub enum State { /// ``` #[derive(Debug)] pub struct Generator<'a, Input: 'a, Output: 'a, Stack: stack::Stack> { - state: State, stack: NoDrop, - stack_id: NoDrop, - stack_ptr: arch::StackPointer, + stack_id: debug::StackId, + stack_ptr: Option, phantom: PhantomData<(&'a (), *mut Input, *const Output)> } -#[allow(unions_with_drop_fields)] -union NoDrop { - inner: T -} - impl ::core::fmt::Debug for NoDrop { fn fmt(&self, w: &mut ::core::fmt::Formatter) -> ::core::fmt::Result { unsafe { @@ -124,31 +148,28 @@ impl<'a, Input, Output, Stack> Generator<'a, Input, Output, Stack> /// See also the [contract](../trait.Stack.html) that needs to be fulfilled by `stack`. pub unsafe fn unsafe_new(stack: Stack, f: F) -> Generator<'a, Input, Output, Stack> where F: FnOnce(&Yielder, Input) + 'a { - unsafe extern "C" fn generator_wrapper(env: usize, stack_ptr: StackPointer) -> ! + unsafe fn generator_wrapper(env: usize, stack_ptr: StackPointer) where Stack: stack::Stack, F: FnOnce(&Yielder, Input) { // Retrieve our environment from the callee and return control to it. - let f = ptr::read(env as *const F); - let (data, stack_ptr) = arch::swap(0, stack_ptr, None); + let f: F = decode_usize(env); + let (data, stack_ptr) = arch::swap(0, stack_ptr); // See the second half of Yielder::suspend_bare. - let input = ptr::read(data as *const Input); + let input = decode_usize(data); // Run the body of the generator. let yielder = Yielder::new(stack_ptr); f(&yielder, input); - // Past this point, the generator has dropped everything it has held. - loop { yielder.suspend_bare(None); } } let stack_id = debug::StackId::register(&stack); - let stack_ptr = arch::init(&stack, generator_wrapper::); + let stack_ptr = arch::init(stack.base(), generator_wrapper::); // Transfer environment to the callee. - let stack_ptr = arch::swap(&f as *const F as usize, stack_ptr, Some(&stack)).1; - mem::forget(f); + let f = NoDrop { inner: f }; + let stack_ptr = arch::swap_link(encode_usize(&f), stack_ptr, stack.base()).1; Generator { - state: State::Runnable, stack: NoDrop { inner: stack }, - stack_id: NoDrop { inner: stack_id }, + stack_id: stack_id, stack_ptr: stack_ptr, phantom: PhantomData } @@ -158,44 +179,38 @@ impl<'a, Input, Output, Stack> Generator<'a, Input, Output, Stack> /// If the generator function has returned, returns `None`. #[inline] pub fn resume(&mut self, input: Input) -> Option { - match self.state { - State::Runnable => { - // Set the state to Unavailable. Since we have exclusive access to the generator, - // the only case where this matters is the generator function panics, after which - // it must not be invocable again. - self.state = State::Unavailable; + // Return None if we have no stack pointer (generator function already returned). + self.stack_ptr.and_then(|stack_ptr| { + // Set the state to Unavailable. Since we have exclusive access to the generator, + // the only case where this matters is the generator function panics, after which + // it must not be invocable again. + self.stack_ptr = None; - // Switch to the generator function, and retrieve the yielded value. - let val = unsafe { - let (data_out, stack_ptr) = arch::swap(&input as *const Input as usize, self.stack_ptr, Some(&self.stack.inner)); - self.stack_ptr = stack_ptr; - mem::forget(input); - ptr::read(data_out as *const Option) - }; + // Switch to the generator function, and retrieve the yielded value. + unsafe { + let input = NoDrop { inner: input }; + let (data_out, stack_ptr) = arch::swap_link(encode_usize(&input), stack_ptr, self.stack.inner.base()); + self.stack_ptr = stack_ptr; - // Unless the generator function has returned, it can be switched to again, so - // set the state to Runnable. - if val.is_some() { self.state = State::Runnable } - - val + // If the generator function has finished, return None, otherwise return the + // yielded value. + stack_ptr.map(|_| decode_usize(data_out)) } - State::Unavailable => None - } + }) } /// Returns the state of the generator. #[inline] - pub fn state(&self) -> State { self.state } + pub fn state(&self) -> State { + if self.stack_ptr.is_some() { State::Runnable } else { State::Unavailable } + } /// Extracts the stack from a generator when the generator function has returned. /// If the generator function has not returned /// (i.e. `self.state() == State::Runnable`), panics. pub fn unwrap(self) -> Stack { - match self.state { - State::Runnable => { - mem::forget(self); - panic!("Argh! Bastard! Don't touch that!") - } + match self.state() { + State::Runnable => panic!("Argh! Bastard! Don't touch that!"), State::Unavailable => unsafe { self.unsafe_unwrap() } } } @@ -203,8 +218,13 @@ impl<'a, Input, Output, Stack> Generator<'a, Input, Output, Stack> /// Extracts the stack from a generator without checking if the generator function has returned. /// This will leave any pointers into the generator stack dangling, and won't run destructors. pub unsafe fn unsafe_unwrap(mut self) -> Stack { - ptr::drop_in_place(&mut self.stack_id.inner); - let stack = ptr::read(&mut self.stack.inner); + if cfg!(feature = "unwind") { + self.stack_ptr.map(|stack_ptr| arch::unwind(stack_ptr, self.stack.inner.base())); + } + + // We can't just return self.stack since Generator has a Drop impl + let stack = ptr::read(&self.stack.inner); + ptr::drop_in_place(&mut self.stack_id); mem::forget(self); stack } @@ -214,10 +234,15 @@ impl<'a, Input, Output, Stack> Drop for Generator<'a, Input, Output, Stack> where Input: 'a, Output: 'a, Stack: stack::Stack { fn drop(&mut self) { unsafe { - ptr::drop_in_place(&mut self.stack_id.inner); - match self.state { - State::Runnable => panic!("dropped unfinished Generator"), - State::Unavailable => ptr::drop_in_place(&mut self.stack.inner) + match self.stack_ptr { + Some(stack_ptr) => { + // If unwinding is not available then we have to leak the stack. + if cfg!(feature = "unwind") { + arch::unwind(stack_ptr, self.stack.inner.base()); + ptr::drop_in_place(&mut self.stack.inner); + } + } + None => ptr::drop_in_place(&mut self.stack.inner) } } } @@ -227,33 +252,43 @@ impl<'a, Input, Output, Stack> Drop for Generator<'a, Input, Output, Stack> /// returns a value. #[derive(Debug)] pub struct Yielder { - stack_ptr: Cell, + stack_ptr: Cell>, phantom: PhantomData<(*const Input, *mut Output)> } impl Yielder { fn new(stack_ptr: StackPointer) -> Yielder { Yielder { - stack_ptr: Cell::new(stack_ptr), + stack_ptr: Cell::new(Some(stack_ptr)), phantom: PhantomData } } - #[inline(always)] - fn suspend_bare(&self, val: Option) -> Input { - unsafe { - let (data, stack_ptr) = arch::swap(&val as *const Option as usize, self.stack_ptr.get(), None); - self.stack_ptr.set(stack_ptr); - mem::forget(val); - ptr::read(data as *const Input) - } - } - /// Suspends the generator and returns `Some(item)` from the `resume()` /// invocation that resumed the generator. #[inline(always)] pub fn suspend(&self, item: Output) -> Input { - self.suspend_bare(Some(item)) + unsafe { + struct PanicGuard<'a>(&'a Cell>); + impl<'a> Drop for PanicGuard<'a> { + fn drop(&mut self) { + self.0.set(None); + } + } + + let stack_ptr = self.stack_ptr.get().expect("attempted to yield while unwinding"); + let item = NoDrop { inner: item }; + + // Use a PanicGuard to set self.stack_ptr to None if unwinding occurs. This + // is necessary to guarantee safety in case someone tries to call yield + // while we are unwinding since there is nowhere to yield to. + let guard = PanicGuard(&self.stack_ptr); + let (data, stack_ptr) = arch::swap(encode_usize(&item), stack_ptr); + mem::forget(guard); + + self.stack_ptr.set(Some(stack_ptr)); + decode_usize(data) + } } } diff --git a/src/lib.rs b/src/lib.rs index af4cf14b..c24dac00 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,7 +4,8 @@ // http://apache.org/licenses/LICENSE-2.0> or the MIT license , at your option. This file may not be // copied, modified, or distributed except according to those terms. -#![feature(asm, naked_functions, cfg_target_vendor, untagged_unions)] + +#![feature(asm, naked_functions, cfg_target_vendor, untagged_unions, unwind_attributes)] #![cfg_attr(feature = "alloc", feature(alloc, heap_api, allocator_api))] #![cfg_attr(test, feature(test))] #![no_std] @@ -43,6 +44,13 @@ pub const STACK_ALIGNMENT: usize = arch::STACK_ALIGNMENT; mod debug; +#[cfg(feature = "unwind")] +#[path = "unwind.rs"] +mod unwind; +#[cfg(not(feature = "unwind"))] +#[path = "no_unwind.rs"] +mod unwind; + pub mod generator; mod stack; diff --git a/src/no_unwind.rs b/src/no_unwind.rs new file mode 100644 index 00000000..24fac7f0 --- /dev/null +++ b/src/no_unwind.rs @@ -0,0 +1,23 @@ +// This file is part of libfringe, a low-level green threading library. +// Copyright (c) Amanieu d'Antras , +// Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be +// copied, modified, or distributed except according to those terms. + +use arch::StackPointer; + +pub unsafe extern "C" fn unwind_wrapper(arg: usize, sp: StackPointer, _stack_base: *mut u8, + f: unsafe fn(usize, StackPointer)) -> usize { + f(arg, sp); + 0 +} + +pub unsafe extern "C" fn start_unwind(_panic: usize) -> ! { + unreachable!(); +} + +#[inline] +pub fn unwind_arg(_stack_base: *mut u8) -> usize { + unreachable!(); +} diff --git a/src/stack/os/mod.rs b/src/stack/os/mod.rs index 7c3fdd61..af6e44bb 100644 --- a/src/stack/os/mod.rs +++ b/src/stack/os/mod.rs @@ -29,8 +29,8 @@ impl OsStack { pub fn new(size: usize) -> Result { let page_size = sys::page_size(); - // Stacks have to be at least one page long. - let len = if size == 0 { page_size } else { size }; + // Stacks have to be at least 16KB to support unwinding. + let len = if size == 0 { 16384 } else { size }; // Round the length one page size up, using the fact that the page size // is a power of two. diff --git a/src/unwind.rs b/src/unwind.rs new file mode 100644 index 00000000..0d018f3d --- /dev/null +++ b/src/unwind.rs @@ -0,0 +1,81 @@ +// This file is part of libfringe, a low-level green threading library. +// Copyright (c) Amanieu d'Antras , +// Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be +// copied, modified, or distributed except according to those terms. +extern crate std; + +use self::std::panic; +use self::std::boxed::Box; +use core::any::Any; +use arch::StackPointer; + +// Marker object that is passed through the stack unwinding +struct UnwindMarker { + // We use the stack base as an identifier so that nested generators are handled + // correctly. When unwinding, we will want to continue through any number of + // nested generators until we reach the one with a matching identifier. + stack_base: *mut u8, +} +unsafe impl Send for UnwindMarker {} + +// Whether the current platform support unwinding across multiple stacks. +#[inline] +fn have_cross_stack_unwind() -> bool { + // - Windows uses SEH for unwinding instead of libunwind. While it may be + // possible to munge it so support cross-stack unwinding, we stay conservative + // for now. + // - iOS on ARM uses setjmp/longjmp instead of DWARF-2 unwinding, which needs + // to be explicitly saved/restored when switching contexts. + // - LLVM doesn't currently support ARM EHABI directives in inline assembly so + // we instead need to propagate exceptions manually across contexts. + !(cfg!(windows) || cfg!(target_arch = "arm")) +} + +// Wrapper around the root function of a generator which handles unwinding. +#[unwind(allowed)] +pub unsafe extern "C" fn unwind_wrapper(arg: usize, sp: StackPointer, stack_base: *mut u8, + f: unsafe fn(usize, StackPointer)) -> Option>> { + // Catch any attempts to unwind out of the context. + match panic::catch_unwind(move || f(arg, sp)) { + Ok(_) => None, + Err(err) => { + // If the unwinding is due to an UnwindMarker, check whether it is intended + // for us by comparing the stack base of the caller with ours. If it is the + // same then we can swallow the exception and return to the caller normally. + if let Some(marker) = err.downcast_ref::() { + if marker.stack_base == stack_base { + return None; + } + } + + // Otherwise, propagate the panic to the parent context. + if have_cross_stack_unwind() { + panic::resume_unwind(err) + } else { + // The assembly code will call start_unwind in the parent context and + // pass it this Box as parameter. + Some(Box::new(err)) + } + } + } +} + +// Called by asm to start unwinding in the current context with the given +// exception object. +#[unwind(allowed)] +pub unsafe extern "C" fn start_unwind(panic: Box>) -> ! { + // Use resume_unwind instead of panic! to avoid printing a message. + panic::resume_unwind(*panic) +} + +// Get the initial argument to pass to start_unwind, keyed to the base address +// of the generator stack that is going to be unwound. +#[inline] +pub fn unwind_arg(stack_base: *mut u8) -> usize { + let marker = UnwindMarker { + stack_base: stack_base + }; + Box::into_raw(Box::new(Box::new(marker) as Box)) as usize +} diff --git a/tests/generator.rs b/tests/generator.rs index d230153a..d243c780 100644 --- a/tests/generator.rs +++ b/tests/generator.rs @@ -68,7 +68,7 @@ fn panic_safety() { #[test] fn with_slice_stack() { - let mut memory = [0; 1024]; + let mut memory = [0; 16384]; let stack = SliceStack::new(&mut memory); let mut add_one = unsafe { Generator::unsafe_new(stack, add_one_fn) }; assert_eq!(add_one.resume(1), Some(2)); @@ -78,7 +78,7 @@ fn with_slice_stack() { #[test] fn with_owned_stack() { - let stack = OwnedStack::new(1024); + let stack = OwnedStack::new(16384); let mut add_one = unsafe { Generator::unsafe_new(stack, add_one_fn) }; assert_eq!(add_one.resume(1), Some(2)); assert_eq!(add_one.resume(2), Some(3));