278 lines
8.1 KiB
ArmAsm
278 lines
8.1 KiB
ArmAsm
/*
|
|
__morestack
|
|
|
|
This function implements stack growth using the mechanism
|
|
devised by Ian Lance Taylor for gccgo, described here:
|
|
|
|
http://gcc.gnu.org/wiki/SplitStacks
|
|
|
|
The Rust stack is composed of a linked list of stack segments,
|
|
and each stack segment contains two parts: the work area,
|
|
where Rust functions are allowed to execute; and the red zone,
|
|
where no Rust code can execute, but where short runtime
|
|
functions (including __morestack), the dynamic linker, signal
|
|
handlers, and the unwinder can run.
|
|
|
|
Each Rust function contains an LLVM-generated prologue that
|
|
compares the stack space required for the current function to
|
|
the space space remaining in the current stack segment,
|
|
maintained in a platform-specific TLS slot. The stack limit
|
|
is strategically maintained by the Rust runtime so that it is
|
|
always in place whenever a Rust function is running.
|
|
|
|
When there is not enough room to run the function, the function
|
|
prologue makes a call to __morestack to allocate a new stack
|
|
segment, copy any stack-based arguments to it, switch stacks,
|
|
then resume execution of the original function.
|
|
|
|
-- The __morestack calling convention --
|
|
|
|
For reasons of efficiency the __morestack calling convention
|
|
is bizarre. The calling function does not attempt to align the
|
|
stack for the call, and on x86_64 the arguments to __morestack
|
|
are passed in scratch registers in order to preserve the
|
|
original function's arguments.
|
|
|
|
Once __morestack has switched to the new stack, instead of
|
|
returning, it then calls into the original function, resuming
|
|
execution at the instruction following the call to
|
|
__morestack. Thus, when the original function returns it
|
|
actually returns to __morestack, which then deallocates the
|
|
stack and returns again to the original function's caller.
|
|
|
|
-- Unwinding --
|
|
|
|
All this trickery causes hell when it comes time for the
|
|
unwinder to navigate it's way through this function. What
|
|
will happen is the original function will be unwound first
|
|
without any special effort, then the unwinder encounters
|
|
the __morestack frame, which is sitting just above a
|
|
tiny fraction of a frame (containing just a return pointer
|
|
and, on 32-bit, the arguments to __morestack).
|
|
|
|
We deal with this by claiming that that little bit of stack
|
|
is actually part of the __morestack frame, encoded as
|
|
DWARF call frame instructions (CFI) by .cfi assembler
|
|
pseudo-ops.
|
|
|
|
One final complication (that took me a week to figure out)
|
|
is that OS X 10.6+ uses its own 'compact unwind info',
|
|
an undocumented format generated by the linker from
|
|
the DWARF CFI. This compact unwind info doesn't correctly
|
|
capture the nuance of the __morestack frame, and as a
|
|
result all of our linking on OS X uses the -no_compact_unwind
|
|
flag.
|
|
*/
|
|
|
|
.text
|
|
|
|
#if defined(__APPLE__)
|
|
#define RUST_GET_TASK L_rust_get_task$stub
|
|
#define UPCALL_DEL_STACK L_upcall_del_stack$stub
|
|
#define UPCALL_CALL_C L_upcall_call_shim_on_c_stack$stub
|
|
#define MORESTACK ___morestack
|
|
#else
|
|
#if defined(__linux__)
|
|
#define UPCALL_NEW_STACK upcall_new_stack
|
|
#define UPCALL_DEL_STACK upcall_del_stack
|
|
#define RUST_GET_TASK rust_get_task
|
|
#define UPCALL_CALL_C upcall_call_shim_on_c_stack
|
|
#define MORESTACK __morestack
|
|
#else
|
|
#define UPCALL_NEW_STACK _upcall_new_stack
|
|
#define UPCALL_DEL_STACK _upcall_del_stack
|
|
#define RUST_GET_TASK _rust_get_task
|
|
#define UPCALL_CALL_C _upcall_call_shim_on_c_stack
|
|
#define MORESTACK ___morestack
|
|
#endif
|
|
#endif
|
|
|
|
.globl UPCALL_NEW_STACK
|
|
#ifndef __APPLE__
|
|
.globl UPCALL_DEL_STACK
|
|
.globl RUST_GET_TASK
|
|
.globl UPCALL_CALL_C_STACK
|
|
#endif
|
|
.globl MORESTACK
|
|
|
|
// FIXME: What about _WIN32?
|
|
#if defined(__linux__)
|
|
.hidden MORESTACK
|
|
#else
|
|
#if defined(__APPLE__)
|
|
.private_extern MORESTACK
|
|
#endif
|
|
#endif
|
|
|
|
#ifdef __ELF__
|
|
.type MORESTACK,@function
|
|
#endif
|
|
|
|
MORESTACK:
|
|
#if defined(__linux__) || defined(__APPLE__)
|
|
.cfi_startproc
|
|
#endif
|
|
|
|
// This base pointer setup differs from most in that we are
|
|
// telling the unwinder to consider the Canonical Frame
|
|
// Address (CFA) for this frame to be the value of the stack
|
|
// pointer prior to entry to the original function, whereas
|
|
// the CFA would typically be the the value of the stack
|
|
// pointer prior to entry to this function. This will allow
|
|
// the unwinder to understand how to skip the tiny partial
|
|
// frame that the original function created by calling
|
|
// __morestack.
|
|
|
|
// In practical terms, our CFA is 12 bytes greater than it
|
|
// would normally be, accounting for the two arguments to
|
|
// __morestack, and an extra return address.
|
|
|
|
pushl %ebp
|
|
#if defined(__linux__) || defined(__APPLE__)
|
|
// The CFA is 20 bytes above the register that it is
|
|
// associated with for this frame (which will be %ebp)
|
|
.cfi_def_cfa_offset 20
|
|
// %ebp is -20 bytes from the CFA
|
|
.cfi_offset %ebp, -20
|
|
#endif
|
|
movl %esp, %ebp
|
|
#if defined(__linux__) || defined(__APPLE__)
|
|
// Calculate the CFA as an offset from %ebp
|
|
.cfi_def_cfa_register %ebp
|
|
#endif
|
|
|
|
// NB: This can be called with the fastcc convention so we
|
|
// have to preserve any argument registers
|
|
|
|
// NB: __morestack is called misaligned by 4 bytes, i.e.
|
|
// subl $4, %esp would get us to a normal alignment
|
|
|
|
subl $44,%esp
|
|
|
|
// Save fastcc arguments
|
|
movl %ecx, 28(%esp)
|
|
movl %edx, 24(%esp)
|
|
|
|
// FIXME (1226): main is compiled with the split-stack prologue,
|
|
// causing it to call __morestack, so we have to jump back out
|
|
calll RUST_GET_TASK
|
|
testl %eax,%eax
|
|
jz .L$bail
|
|
|
|
// Save the the correct %esp value for our grandparent frame,
|
|
// for the unwinder
|
|
// FIXME: This isn't used
|
|
leal 20(%ebp), %eax
|
|
movl %eax, -4(%ebp)
|
|
|
|
// The arguments to upcall_new_stack
|
|
|
|
// The size of the stack arguments to copy to the new stack,
|
|
// ane of the the arguments to __morestack
|
|
movl 56(%esp),%eax
|
|
movl %eax,20(%esp)
|
|
// The address of the stack arguments to the original function
|
|
leal 64(%esp),%eax
|
|
movl %eax,16(%esp)
|
|
// The amount of stack needed for the original function,
|
|
// the other argument to __morestack
|
|
movl 52(%esp),%eax // The amount of stack needed
|
|
movl %eax,12(%esp)
|
|
// Out pointer to the new stack
|
|
movl $0, 8(%esp)
|
|
|
|
#ifdef __APPLE__
|
|
call 1f
|
|
1: popl %eax
|
|
movl L_upcall_new_stack$non_lazy_ptr-1b(%eax),%eax
|
|
movl %eax, 4(%esp)
|
|
#else
|
|
movl $UPCALL_NEW_STACK,4(%esp)
|
|
#endif
|
|
|
|
leal 8(%esp), %eax
|
|
movl %eax,(%esp)
|
|
call UPCALL_CALL_C
|
|
|
|
// Grab the __morestack return pointer
|
|
movl 48(%esp),%eax
|
|
// Skip past the ret instruction in the parent fn
|
|
inc %eax
|
|
|
|
// Restore the fastcc arguments to the original function
|
|
movl 28(%esp), %ecx
|
|
movl 24(%esp), %edx
|
|
|
|
// Switch stacks
|
|
movl 8(%esp),%esp
|
|
// Re-enter the function that called us
|
|
call *%eax
|
|
|
|
// Now the function that called us has returned, so we need to
|
|
// delete the old stack space
|
|
|
|
// Switch back to the rust stack
|
|
movl %ebp, %esp
|
|
|
|
// Realign stack - remember that __morestack was called misaligned
|
|
subl $12, %esp
|
|
|
|
// Now that we're on the return path we want to avoid
|
|
// stomping on %eax. FIXME: Need to save and restore %eax to
|
|
// actually preserve it across the call to delete the stack
|
|
call UPCALL_DEL_STACK
|
|
|
|
addl $12,%esp
|
|
|
|
popl %ebp
|
|
|
|
// FIXME: I don't think these rules are necessary
|
|
// since the unwinder should never encounter an instruction
|
|
// pointer pointing here.
|
|
#if defined(__linux__) || defined(__APPLE__)
|
|
// Restore the rule for how to find %ebp
|
|
.cfi_restore %ebp
|
|
// Tell the unwinder how to find the CFA in terms of %esp
|
|
.cfi_def_cfa %esp, 16
|
|
#endif
|
|
retl $8
|
|
|
|
.L$bail:
|
|
movl 32(%esp),%eax
|
|
inc %eax
|
|
|
|
addl $44, %esp
|
|
popl %ebp
|
|
addl $4+8,%esp
|
|
|
|
jmpl *%eax
|
|
|
|
#if defined(__linux__) || defined(__APPLE__)
|
|
.cfi_endproc
|
|
#endif
|
|
|
|
#ifdef __APPLE__
|
|
|
|
.section __IMPORT,__pointers,non_lazy_symbol_pointers
|
|
L_upcall_new_stack$non_lazy_ptr:
|
|
.indirect_symbol _upcall_new_stack
|
|
.long 0
|
|
|
|
.section __IMPORT,__jump_table,symbol_stubs,pure_instructions+self_modifying_code,5
|
|
|
|
// Linker will replace the hlts (the ascii) with jmp
|
|
L_rust_get_task$stub:
|
|
.indirect_symbol _rust_get_task
|
|
.ascii "\364\364\364\364\364"
|
|
|
|
L_upcall_del_stack$stub:
|
|
.indirect_symbol _upcall_del_stack
|
|
.ascii "\364\364\364\364\364"
|
|
|
|
L_upcall_call_shim_on_c_stack$stub:
|
|
.indirect_symbol _upcall_call_shim_on_c_stack
|
|
.ascii "\364\364\364\364\364"
|
|
|
|
.subsections_via_symbols
|
|
#endif
|
|
|