From 41def7c0cf6f848784a0d00f33abd5dc608bbfea Mon Sep 17 00:00:00 2001 From: Vladimir Sadov Date: Fri, 22 Jul 2022 07:44:21 -0700 Subject: [PATCH] [NativeAOT] Some cleanup of assembly helpers in hijack area. (#72542) * remove RhpReversePInvokeAttachOrTrapThread (dead code) * remove RhpWaitForGC * removing some dead code * remove unused extraStack parameter to PUSH_PROBE_FRAME * make all working variants of RhpGcProbeHijack to have the same shape * fix Unix build, tweak some comments * check the trap flag in RhpGcProbeHijack * add a stub for RhpGcStressHijack on Unix * save flags for windows x64 should not have rdx * not saving scratch registers on win-arm64 * PUSH_PROBE_FRAME on arm64 * couple comments * make RhpGcProbeHijack responsible for setting PTFF_SAVE_ bits * revert `RhpReversePInvokeAttachOrTrapThread2` change * fix indentation * made 32bit RhpGcStressHijack similar to 64bit counterparts (as much as can be done without trying to compile and test) * Renamed `RhpGcProbe` --> `RhpWaitForGC` --- src/coreclr/nativeaot/Runtime/ICodeManager.h | 14 +- .../nativeaot/Runtime/amd64/AsmMacros.inc | 1 - src/coreclr/nativeaot/Runtime/amd64/GcProbe.S | 100 +---- .../nativeaot/Runtime/amd64/GcProbe.asm | 404 +++--------------- .../nativeaot/Runtime/amd64/PInvoke.asm | 122 +----- src/coreclr/nativeaot/Runtime/arm/AsmMacros.h | 1 - src/coreclr/nativeaot/Runtime/arm/GcProbe.asm | 302 ++----------- src/coreclr/nativeaot/Runtime/arm/PInvoke.asm | 90 ---- .../nativeaot/Runtime/arm64/AsmMacros.h | 1 - .../nativeaot/Runtime/arm64/GcProbe.asm | 327 +++++--------- src/coreclr/nativeaot/Runtime/arm64/PInvoke.S | 122 ------ .../nativeaot/Runtime/arm64/PInvoke.asm | 127 ------ .../nativeaot/Runtime/i386/AsmMacros.inc | 2 - .../nativeaot/Runtime/i386/GcProbe.asm | 302 +------------ .../nativeaot/Runtime/i386/PInvoke.asm | 76 ---- src/coreclr/nativeaot/Runtime/portable.cpp | 10 +- src/coreclr/nativeaot/Runtime/thread.cpp | 91 +--- src/coreclr/nativeaot/Runtime/thread.h | 23 +- .../Runtime/unix/unixasmmacrosamd64.inc | 2 +- 19 files changed, 269 insertions(+), 1848 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/ICodeManager.h b/src/coreclr/nativeaot/Runtime/ICodeManager.h index 3477ba1f932ca..3671b6f683905 100644 --- a/src/coreclr/nativeaot/Runtime/ICodeManager.h +++ b/src/coreclr/nativeaot/Runtime/ICodeManager.h @@ -53,10 +53,9 @@ C_ASSERT(PTFF_X1_IS_BYREF == ((uint64_t)GCRK_Scalar_Byref << 32)); inline uint64_t ReturnKindToTransitionFrameFlags(GCRefKind returnKind) { - if (returnKind == GCRK_Scalar) - return 0; - - return PTFF_SAVE_X0 | PTFF_SAVE_X1 | ((uint64_t)returnKind << 32); + // just need to report gc ref bits here. + // appropriate PTFF_SAVE_ bits will be added by the frame building routine. + return ((uint64_t)returnKind << 32); } inline GCRefKind TransitionFrameFlagsToReturnKind(uint64_t transFrameFlags) @@ -76,10 +75,9 @@ C_ASSERT(PTFF_RDX_IS_BYREF == ((uint64_t)GCRK_Scalar_Byref << 16)); inline uint64_t ReturnKindToTransitionFrameFlags(GCRefKind returnKind) { - if (returnKind == GCRK_Scalar) - return 0; - - return PTFF_SAVE_RAX | PTFF_SAVE_RDX | ((uint64_t)returnKind << 16); + // just need to report gc ref bits here. + // appropriate PTFF_SAVE_ bits will be added by the frame building routine. + return ((uint64_t)returnKind << 16); } inline GCRefKind TransitionFrameFlagsToReturnKind(uint64_t transFrameFlags) diff --git a/src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc b/src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc index cb1f9830eb89a..d44537b2456c1 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc +++ b/src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc @@ -396,7 +396,6 @@ EXTERN RhpGcAlloc : PROC EXTERN RhpValidateExInfoPop : PROC EXTERN RhDebugBreak : PROC EXTERN RhpWaitForGC2 : PROC -EXTERN RhpReversePInvokeAttachOrTrapThread2 : PROC EXTERN RhExceptionHandling_FailedAllocation : PROC EXTERN RhThrowHwEx : PROC EXTERN RhThrowEx : PROC diff --git a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.S b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.S index 810c7e35b90cd..39dcceb5234f3 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.S +++ b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.S @@ -67,24 +67,6 @@ pop rdx .endm -// -// Macro to clear the hijack state. This is safe to do because the suspension code will not Unhijack this -// thread if it finds it at an IP that isn`t managed code. -// -// Register state on entry: -// R11: thread pointer -// -// Register state on exit: -// R9: trashed -// -.macro ClearHijackState - xor r9, r9 - mov [r11 + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], r9 - mov [r11 + OFFSETOF__Thread__m_pvHijackedReturnAddress], r9 - mov [r11 + OFFSETOF__Thread__m_uHijackedReturnValueFlags], r9 -.endm - - // // The prolog for all GC suspension hijacks (normal and stress). Fixes up the hijacked return address, and // clears the hijack state. @@ -98,7 +80,7 @@ // RAX, RDX preserved, other volatile regs trashed // .macro FixupHijackedCallstack - // preserve RAX, RDX as they may contain retuvalues + // preserve RAX, RDX as they may contain return values push rax push rdx @@ -109,88 +91,43 @@ pop rdx pop rax - // // Fix the stack by pushing the original return address - // mov rcx, [r11 + OFFSETOF__Thread__m_pvHijackedReturnAddress] push rcx + // Fetch the return address flags mov rcx, [r11 + OFFSETOF__Thread__m_uHijackedReturnValueFlags] - ClearHijackState -.endm - -////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// -// RhpWaitForGCNoAbort -- rare path for WaitForGCCompletion -// -// -// INPUT: RDI: transition frame -// -// TRASHES: RCX, RDI, R8, R9, R10, R11 -// -////////////////////////////////////////////////////////////////////////////////////////////////////////////// -NESTED_ENTRY RhpWaitForGCNoAbort, _TEXT, NoHandler - END_PROLOGUE - - mov rdx, [rdi + OFFSETOF__PInvokeTransitionFrame__m_pThread] - - test dword ptr [rdx + OFFSETOF__Thread__m_ThreadStateFlags], TSF_DoNotTriggerGc - jnz Done - - // passing transition frame pointer in rdi - call C_FUNC(RhpWaitForGC2) - -Done: - ret - -NESTED_END RhpWaitForGCNoAbort, _TEXT - -// -// Set the Thread state and wait for a GC to complete. -// -// Register state on entry: -// RBX: thread pointer -// -// Register state on exit: -// RBX: thread pointer -// All other registers trashed -// - -.macro WaitForGCCompletion - test dword ptr [rbx + OFFSETOF__Thread__m_ThreadStateFlags], TSF_SuppressGcStress + TSF_DoNotTriggerGc - jnz LOCAL_LABEL(NoWait) - - mov rdi, [rbx + OFFSETOF__Thread__m_pDeferredTransitionFrame] - call C_FUNC(RhpWaitForGCNoAbort) -LOCAL_LABEL(NoWait): - + // Clear hijack state + xor r9, r9 + mov [r11 + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], r9 + mov [r11 + OFFSETOF__Thread__m_pvHijackedReturnAddress], r9 + mov [r11 + OFFSETOF__Thread__m_uHijackedReturnValueFlags], r9 .endm -// -// // // GC Probe Hijack target // -// - NESTED_ENTRY RhpGcProbeHijack, _TEXT, NoHandler END_PROLOGUE FixupHijackedCallstack - or ecx, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_SAVE_RDX - jmp C_FUNC(RhpGcProbe) -NESTED_END RhpGcProbeHijack, _TEXT -NESTED_ENTRY RhpGcProbe, _TEXT, NoHandler test dword ptr [C_VAR(RhpTrapThreads)], TrapThreadsFlags_TrapThreads - jnz LOCAL_LABEL(RhpGcProbe_Trap) + jnz LOCAL_LABEL(WaitForGC) ret -LOCAL_LABEL(RhpGcProbe_Trap): + +LOCAL_LABEL(WaitForGC): + or ecx, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_SAVE_RDX + jmp C_FUNC(RhpWaitForGC) +NESTED_END RhpGcProbeHijack, _TEXT + +NESTED_ENTRY RhpWaitForGC, _TEXT, NoHandler PUSH_PROBE_FRAME r11, rax, rcx END_PROLOGUE mov rbx, r11 - WaitForGCCompletion + mov rdi, [rbx + OFFSETOF__Thread__m_pDeferredTransitionFrame] + call C_FUNC(RhpWaitForGC2) mov rax, [rbx + OFFSETOF__Thread__m_pDeferredTransitionFrame] test dword ptr [rax + OFFSETOF__PInvokeTransitionFrame__m_Flags], PTFF_THREAD_ABORT @@ -203,7 +140,7 @@ LOCAL_LABEL(Abort): pop rdx // return address as exception RIP jmp C_FUNC(RhpThrowHwEx) // Throw the ThreadAbortException as a special kind of hardware exception -NESTED_END RhpGcProbe, _TEXT +NESTED_END RhpWaitForGC, _TEXT LEAF_ENTRY RhpGcPoll, _TEXT @@ -212,7 +149,6 @@ LEAF_ENTRY RhpGcPoll, _TEXT ret LOCAL_LABEL(RhpGcPoll_RarePath): jmp C_FUNC(RhpGcPollRare) - LEAF_END RhpGcPoll, _TEXT NESTED_ENTRY RhpGcPollRare, _TEXT, NoHandler diff --git a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm index 2a83258629bd5..c01ada624f190 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm @@ -3,27 +3,23 @@ include AsmMacros.inc -PROBE_SAVE_FLAGS_EVERYTHING equ DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_ALL_SCRATCH -PROBE_SAVE_FLAGS_RAX_IS_GCREF equ DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_GCREF - ;; -;; See PUSH_COOP_PINVOKE_FRAME, this macro is very similar, but also saves RAX and accepts the register -;; bitmask in RCX +;; See PUSH_COOP_PINVOKE_FRAME, this macro is very similar, but also saves RAX and accepts +;; the register bitmask ;; ;; On entry: -;; - BITMASK: bitmask describing pushes, may be volatile register or constant value +;; - BITMASK: bitmask describing pushes, a volatile register ;; - RAX: managed function return value, may be an object or byref ;; - preserved regs: need to stay preserved, may contain objects or byrefs -;; - extraStack bytes of stack have already been allocated ;; ;; INVARIANTS ;; - The macro assumes it is called from a prolog, prior to a frame pointer being setup. ;; - All preserved registers remain unchanged from their values in managed code. ;; -PUSH_PROBE_FRAME macro threadReg, trashReg, extraStack, BITMASK +PUSH_PROBE_FRAME macro threadReg, trashReg, BITMASK push_vol_reg rax ; save RAX, it might contain an objectref - lea trashReg, [rsp + 10h + extraStack] + lea trashReg, [rsp + 10h] push_vol_reg trashReg ; save caller's RSP push_nonvol_reg r15 ; save preserved registers push_nonvol_reg r14 ; .. @@ -35,17 +31,17 @@ PUSH_PROBE_FRAME macro threadReg, trashReg, extraStack, BITMASK push_vol_reg BITMASK ; save the register bitmask passed in by caller push_vol_reg threadReg ; Thread * (unused by stackwalker) push_nonvol_reg rbp ; save caller's RBP - mov trashReg, [rsp + 12*8 + extraStack] ; Find the return address + mov trashReg, [rsp + 12*8] ; Find the return address push_vol_reg trashReg ; save m_RIP lea trashReg, [rsp + 0] ; trashReg == address of frame ;; allocate scratch space and any required alignment - alloc_stack 20h + 10h + (extraStack AND (10h-1)) + alloc_stack 20h + 10h ;; save xmm0 in case it's being used as a return value movdqa [rsp + 20h], xmm0 - ; link the frame into the Thread + ;; link the frame into the Thread mov [threadReg + OFFSETOF__Thread__m_pDeferredTransitionFrame], trashReg endm @@ -54,11 +50,9 @@ endm ;; registers and return value to their values from before the probe was called (while also updating any ;; object refs or byrefs). ;; -;; NOTE: does NOT deallocate the 'extraStack' portion of the stack, the user of this macro must do that. -;; -POP_PROBE_FRAME macro extraStack +POP_PROBE_FRAME macro movdqa xmm0, [rsp + 20h] - add rsp, 20h + 10h + (extraStack AND (10h-1)) + 8 + add rsp, 20h + 10h + 8 ; deallocate stack and discard saved m_RIP pop rbp pop rax ; discard Thread* pop rax ; discard BITMASK @@ -73,23 +67,6 @@ POP_PROBE_FRAME macro extraStack pop rax endm -;; -;; Macro to clear the hijack state. This is safe to do because the suspension code will not Unhijack this -;; thread if it finds it at an IP that isn't managed code. -;; -;; Register state on entry: -;; RDX: thread pointer -;; -;; Register state on exit: -;; RCX: trashed -;; -ClearHijackState macro - xor ecx, ecx - mov [rdx + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], rcx - mov [rdx + OFFSETOF__Thread__m_pvHijackedReturnAddress], rcx -endm - - ;; ;; The prolog for all GC suspension hijacks (normal and stress). Fixes up the hijacked return address, and ;; clears the hijack state. @@ -98,99 +75,96 @@ endm ;; All registers correct for return to the original return address. ;; ;; Register state on exit: -;; RCX: trashed ;; RDX: thread pointer +;; RCX: return value flags +;; RAX: preserved, other volatile regs trashed ;; FixupHijackedCallstack macro - ;; rdx <- GetThread(), TRASHES rcx INLINE_GETTHREAD rdx, rcx - ;; ;; Fix the stack by pushing the original return address - ;; mov rcx, [rdx + OFFSETOF__Thread__m_pvHijackedReturnAddress] push rcx - ClearHijackState + ;; Fetch the return address flags + mov rcx, [rdx + OFFSETOF__Thread__m_uHijackedReturnValueFlags] + + ;; Clear hijack state + xor r9, r9 + mov [rdx + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], r9 + mov [rdx + OFFSETOF__Thread__m_pvHijackedReturnAddress], r9 + mov [rdx + OFFSETOF__Thread__m_uHijackedReturnValueFlags], r9 endm +EXTERN RhpPInvokeExceptionGuard : PROC + ;; -;; Set the Thread state and wait for a GC to complete. -;; -;; Register state on entry: -;; RBX: thread pointer -;; -;; Register state on exit: -;; RBX: thread pointer -;; All other registers trashed +;; GC Probe Hijack target ;; +NESTED_ENTRY RhpGcProbeHijack, _TEXT, RhpPInvokeExceptionGuard + END_PROLOGUE + FixupHijackedCallstack + + test [RhpTrapThreads], TrapThreadsFlags_TrapThreads + jnz @f + ret +@@: + or ecx, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + jmp RhpWaitForGC +NESTED_END RhpGcProbeHijack, _TEXT -EXTERN RhpWaitForGCNoAbort : PROC +EXTERN RhpThrowHwEx : PROC -WaitForGCCompletion macro - test dword ptr [rbx + OFFSETOF__Thread__m_ThreadStateFlags], TSF_SuppressGcStress + TSF_DoNotTriggerGc - jnz @F +NESTED_ENTRY RhpWaitForGC, _TEXT + PUSH_PROBE_FRAME rdx, rax, rcx + END_PROLOGUE + mov rbx, rdx mov rcx, [rbx + OFFSETOF__Thread__m_pDeferredTransitionFrame] - call RhpWaitForGCNoAbort -@@: + call RhpWaitForGC2 -endm + mov rax, [rbx + OFFSETOF__Thread__m_pDeferredTransitionFrame] + test dword ptr [rax + OFFSETOF__PInvokeTransitionFrame__m_Flags], PTFF_THREAD_ABORT + jnz Abort + POP_PROBE_FRAME + ret +Abort: + POP_PROBE_FRAME + mov rcx, STATUS_REDHAWK_THREAD_ABORT + pop rdx ;; return address as exception RIP + jmp RhpThrowHwEx ;; Throw the ThreadAbortException as a special kind of hardware exception +NESTED_END RhpWaitForGC, _TEXT -EXTERN RhpPInvokeExceptionGuard : PROC +LEAF_ENTRY RhpGcPoll, _TEXT + cmp [RhpTrapThreads], TrapThreadsFlags_None + jne @F ; forward branch - predicted not taken + ret +@@: + jmp RhpGcPollRare +LEAF_END RhpGcPoll, _TEXT -;; -;; -;; -;; GC Probe Hijack targets -;; -;; -NESTED_ENTRY RhpGcProbeHijackScalar, _TEXT, RhpPInvokeExceptionGuard +NESTED_ENTRY RhpGcPollRare, _TEXT + PUSH_COOP_PINVOKE_FRAME rcx END_PROLOGUE - FixupHijackedCallstack - mov ecx, DEFAULT_FRAME_SAVE_FLAGS - jmp RhpGcProbe -NESTED_END RhpGcProbeHijackScalar, _TEXT + call RhpGcPoll2 + POP_COOP_PINVOKE_FRAME + ret +NESTED_END RhpGcPollRare, _TEXT -NESTED_ENTRY RhpGcProbeHijackObject, _TEXT, RhpPInvokeExceptionGuard - END_PROLOGUE - FixupHijackedCallstack - mov ecx, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_GCREF - jmp RhpGcProbe -NESTED_END RhpGcProbeHijackObject, _TEXT -NESTED_ENTRY RhpGcProbeHijackByref, _TEXT, RhpPInvokeExceptionGuard - END_PROLOGUE - FixupHijackedCallstack - mov ecx, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_BYREF - jmp RhpGcProbe -NESTED_END RhpGcProbeHijackByref, _TEXT ifdef FEATURE_GC_STRESS -;; + ;; ;; GC Stress Hijack targets ;; -;; -LEAF_ENTRY RhpGcStressHijackScalar, _TEXT - FixupHijackedCallstack - mov ecx, DEFAULT_FRAME_SAVE_FLAGS - jmp RhpGcStressProbe -LEAF_END RhpGcStressHijackScalar, _TEXT - -LEAF_ENTRY RhpGcStressHijackObject, _TEXT - FixupHijackedCallstack - mov ecx, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_GCREF - jmp RhpGcStressProbe -LEAF_END RhpGcStressHijackObject, _TEXT - -LEAF_ENTRY RhpGcStressHijackByref, _TEXT +LEAF_ENTRY RhpGcStressHijack, _TEXT FixupHijackedCallstack - mov ecx, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_BYREF + or ecx, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX jmp RhpGcStressProbe -LEAF_END RhpGcStressHijackByref, _TEXT +LEAF_END RhpGcStressHijack, _TEXT ;; ;; Worker for our GC stress probes. Do not call directly!! @@ -206,45 +180,15 @@ LEAF_END RhpGcStressHijackByref, _TEXT ;; All other registers restored as they were when the hijack was first reached. ;; NESTED_ENTRY RhpGcStressProbe, _TEXT - PUSH_PROBE_FRAME rdx, rax, 0, rcx + PUSH_PROBE_FRAME rdx, rax, rcx END_PROLOGUE call REDHAWKGCINTERFACE__STRESSGC - POP_PROBE_FRAME 0 + POP_PROBE_FRAME ret NESTED_END RhpGcStressProbe, _TEXT -endif ;; FEATURE_GC_STRESS - -EXTERN RhpThrowHwEx : PROC - -NESTED_ENTRY RhpGcProbe, _TEXT - test [RhpTrapThreads], TrapThreadsFlags_TrapThreads - jnz @f - ret -@@: - PUSH_PROBE_FRAME rdx, rax, 0, rcx - END_PROLOGUE - - mov rbx, rdx - WaitForGCCompletion - - mov rax, [rbx + OFFSETOF__Thread__m_pDeferredTransitionFrame] - test dword ptr [rax + OFFSETOF__PInvokeTransitionFrame__m_Flags], PTFF_THREAD_ABORT - jnz Abort - POP_PROBE_FRAME 0 - ret -Abort: - POP_PROBE_FRAME 0 - mov rcx, STATUS_REDHAWK_THREAD_ABORT - pop rdx ;; return address as exception RIP - jmp RhpThrowHwEx ;; Throw the ThreadAbortException as a special kind of hardware exception - -NESTED_END RhpGcProbe, _TEXT - - -ifdef FEATURE_GC_STRESS ;; PAL_LIMITED_CONTEXT, 6 xmm regs to save, 2 scratch regs to save, plus 20h bytes for scratch space RhpHijackForGcStress_FrameSize equ SIZEOF__PAL_LIMITED_CONTEXT + 6*10h + 2*8h + 20h @@ -338,200 +282,14 @@ NESTED_ENTRY RhpHijackForGcStress, _TEXT ret NESTED_END RhpHijackForGcStress, _TEXT -endif ;; FEATURE_GC_STRESS - - -;; -;; The following functions are _jumped_ to when we need to transfer control from one method to another for EH -;; dispatch. These are needed to properly coordinate with the GC hijacking logic. We are essentially replacing -;; the return from the throwing method with a jump to the handler in the caller, but we need to be aware of -;; any return address hijack that may be in place for GC suspension. These routines use a quick test of the -;; return address against a specific GC hijack routine, and then fixup the stack pointer to what it would be -;; after a real return from the throwing method. Then, if we are not hijacked we can simply jump to the -;; handler in the caller. -;; -;; If we are hijacked, then we jump to a routine that will unhijack appropriatley and wait for the GC to -;; complete. There are also variants for GC stress. -;; -;; Note that at this point we are eiher hijacked or we are not, and this will not change until we return to -;; managed code. It is an invariant of the system that a thread will only attempt to hijack or unhijack -;; another thread while the target thread is suspended in managed code, and this is _not_ managed code. -;; -;; Register state on entry: -;; RAX: pointer to this function (i.e., trash) -;; RCX: reference to the exception object. -;; RDX: handler address we want to jump to. -;; RBX, RSI, RDI, RBP, and R12-R15 are all already correct for return to the caller. -;; The stack still contains the return address. -;; -;; Register state on exit: -;; RSP: what it would be after a complete return to the caler. -;; RDX: TRASHED -;; -RTU_EH_JUMP_HELPER macro funcName, hijackFuncName, isStress, stressFuncName -LEAF_ENTRY funcName, _TEXT - lea rax, [hijackFuncName] - cmp [rsp], rax - je RhpGCProbeForEHJump - -IF isStress EQ 1 - lea rax, [stressFuncName] - cmp [rsp], rax - je RhpGCStressProbeForEHJump -ENDIF - - ;; We are not hijacked, so we can return to the handler. - ;; We return to keep the call/return prediction balanced. - mov [rsp], rdx ; Update the return address - ret - -LEAF_END funcName, _TEXT -endm - -;; We need an instance of the helper for each possible hijack function. The binder has enough -;; information to determine which one we need to use for any function. -RTU_EH_JUMP_HELPER RhpEHJumpScalar, RhpGcProbeHijackScalar, 0, 0 -RTU_EH_JUMP_HELPER RhpEHJumpObject, RhpGcProbeHijackObject, 0, 0 -RTU_EH_JUMP_HELPER RhpEHJumpByref, RhpGcProbeHijackByref, 0, 0 -ifdef FEATURE_GC_STRESS -RTU_EH_JUMP_HELPER RhpEHJumpScalarGCStress, RhpGcProbeHijackScalar, 1, RhpGcStressHijackScalar -RTU_EH_JUMP_HELPER RhpEHJumpObjectGCStress, RhpGcProbeHijackObject, 1, RhpGcStressHijackObject -RTU_EH_JUMP_HELPER RhpEHJumpByrefGCStress, RhpGcProbeHijackByref, 1, RhpGcStressHijackByref -endif - -;; -;; Macro to setup our frame and adjust the location of the EH object reference for EH jump probe funcs. -;; -;; Register state on entry: -;; RAX: scratch -;; RCX: reference to the exception object. -;; RDX: handler address we want to jump to. -;; RBX, RSI, RDI, RBP, and R12-R15 are all already correct for return to the caller. -;; The stack is as if we are just about to returned from the call -;; -;; Register state on exit: -;; RAX: reference to the exception object -;; RCX: scratch -;; RDX: thread pointer -;; -EHJumpProbeProlog_extraStack = 1*8 -EHJumpProbeProlog macro - push_nonvol_reg rdx ; save the handler address so we can jump to it later - mov rax, rcx ; move the ex object reference into rax so we can report it - - ;; rdx <- GetThread(), TRASHES rcx - INLINE_GETTHREAD rdx, rcx - - ;; Fix the stack by patching the original return address - mov rcx, [rdx + OFFSETOF__Thread__m_pvHijackedReturnAddress] - mov [rsp + EHJumpProbeProlog_extraStack], rcx - - ClearHijackState - - ; TRASHES r10 - PUSH_PROBE_FRAME rdx, r10, EHJumpProbeProlog_extraStack, PROBE_SAVE_FLAGS_RAX_IS_GCREF - - END_PROLOGUE -endm - -;; -;; Macro to re-adjust the location of the EH object reference, cleanup the frame, and make the -;; final jump to the handler for EH jump probe funcs. -;; -;; Register state on entry: -;; RAX: reference to the exception object -;; RCX: scratch -;; RDX: scratch -;; -;; Register state on exit: -;; RSP: correct for return to the caller -;; RCX: reference to the exception object -;; RDX: trashed -;; -EHJumpProbeEpilog macro - POP_PROBE_FRAME EHJumpProbeProlog_extraStack - mov rcx, rax ; Put the EX obj ref back into rcx for the handler. - - pop rax ; Recover the handler address. - mov [rsp], rax ; Update the return address - ret -endm - -;; -;; We are hijacked for a normal GC (not GC stress), so we need to unhijcak and wait for the GC to complete. -;; -;; Register state on entry: -;; RAX: scratch -;; RCX: reference to the exception object. -;; RDX: handler address we want to jump to. -;; RBX, RSI, RDI, RBP, and R12-R15 are all already correct for return to the caller. -;; The stack is as if we have tail called to this function (rsp points to return address). -;; -;; Register state on exit: -;; RSP: correct for return to the caller -;; RBP: previous ebp frame -;; RCX: reference to the exception object -;; -NESTED_ENTRY RhpGCProbeForEHJump, _TEXT - EHJumpProbeProlog - -ifdef _DEBUG - ;; - ;; If we get here, then we have been hijacked for a real GC, and our SyncState must - ;; reflect that we've been requested to synchronize. - - test [RhpTrapThreads], TrapThreadsFlags_TrapThreads - jnz @F - - call RhDebugBreak -@@: -endif ;; _DEBUG - - mov rbx, rdx - WaitForGCCompletion - - EHJumpProbeEpilog - -NESTED_END RhpGCProbeForEHJump, _TEXT - -ifdef FEATURE_GC_STRESS -;; -;; We are hijacked for GC Stress (not a normal GC) so we need to invoke the GC stress helper. -;; -;; Register state on entry: -;; RAX: scratch -;; RCX: reference to the exception object. -;; RDX: handler address we want to jump to. -;; RBX, RSI, RDI, RBP, and R12-R15 are all already correct for return to the caller. -;; The stack is as if we have tail called to this function (rsp points to return address). -;; -;; Register state on exit: -;; RSP: correct for return to the caller -;; RBP: previous ebp frame -;; RCX: reference to the exception object -;; -NESTED_ENTRY RhpGCStressProbeForEHJump, _TEXT - EHJumpProbeProlog - - call REDHAWKGCINTERFACE__STRESSGC - - EHJumpProbeEpilog - -NESTED_END RhpGCStressProbeForEHJump, _TEXT - g_pTheRuntimeInstance equ ?g_pTheRuntimeInstance@@3PEAVRuntimeInstance@@EA EXTERN g_pTheRuntimeInstance : QWORD RuntimeInstance__ShouldHijackLoopForGcStress equ ?ShouldHijackLoopForGcStress@RuntimeInstance@@QEAA_N_K@Z EXTERN RuntimeInstance__ShouldHijackLoopForGcStress : PROC -endif ;; FEATURE_GC_STRESS - EXTERN g_fGcStressStarted : DWORD EXTERN g_fHasFastFxsave : BYTE -FXSAVE_SIZE equ 512 - -ifdef FEATURE_GC_STRESS ;; ;; INVARIANT: Don't trash the argument registers, the binder codegen depends on this. ;; @@ -542,29 +300,9 @@ LEAF_ENTRY RhpSuppressGcStress, _TEXT ret LEAF_END RhpSuppressGcStress, _TEXT -endif ;; FEATURE_GC_STRESS - -LEAF_ENTRY RhpGcPoll, _TEXT - - cmp [RhpTrapThreads], TrapThreadsFlags_None - jne @F ; forward branch - predicted not taken - ret -@@: - jmp RhpGcPollRare - -LEAF_END RhpGcPoll, _TEXT - -NESTED_ENTRY RhpGcPollRare, _TEXT - - PUSH_COOP_PINVOKE_FRAME rcx - END_PROLOGUE - call RhpGcPoll2 - - POP_COOP_PINVOKE_FRAME +endif ;; FEATURE_GC_STRESS - ret -NESTED_END RhpGcPollRare, _TEXT end diff --git a/src/coreclr/nativeaot/Runtime/amd64/PInvoke.asm b/src/coreclr/nativeaot/Runtime/amd64/PInvoke.asm index ffa74efc257d3..0b08d84484995 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/PInvoke.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/PInvoke.asm @@ -3,126 +3,6 @@ include asmmacros.inc -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; -;; RhpWaitForGCNoAbort -- rare path for WaitForGCCompletion -;; -;; -;; INPUT: RCX: transition frame -;; -;; TRASHES: RCX, RDX, R8, R9, R10, R11 -;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -NESTED_ENTRY RhpWaitForGCNoAbort, _TEXT - push_vol_reg rax ; don't trash the integer return value - alloc_stack 30h - movdqa [rsp + 20h], xmm0 ; don't trash the FP return value - END_PROLOGUE - - mov rdx, [rcx + OFFSETOF__PInvokeTransitionFrame__m_pThread] - - test dword ptr [rdx + OFFSETOF__Thread__m_ThreadStateFlags], TSF_DoNotTriggerGc - jnz Done - - ; passing transition frame pointer in rcx - call RhpWaitForGC2 - -Done: - movdqa xmm0, [rsp + 20h] - add rsp, 30h - pop rax - ret - -NESTED_END RhpWaitForGCNoAbort, _TEXT - -EXTERN RhpThrowHwEx : PROC - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; -;; RhpWaitForGC -- rare path for RhpPInvokeReturn -;; -;; -;; INPUT: RCX: transition frame -;; -;; TRASHES: RCX, RDX, R8, R9, R10, R11 -;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -NESTED_ENTRY RhpWaitForGC, _TEXT - push_nonvol_reg rbx - END_PROLOGUE - - mov rbx, rcx - - test [RhpTrapThreads], TrapThreadsFlags_TrapThreads - jz NoWait - - call RhpWaitForGCNoAbort -NoWait: - test [RhpTrapThreads], TrapThreadsFlags_AbortInProgress - jz Done - test dword ptr [rbx + OFFSETOF__PInvokeTransitionFrame__m_Flags], PTFF_THREAD_ABORT - jz Done - - mov rcx, STATUS_REDHAWK_THREAD_ABORT - pop rbx - pop rdx ; return address as exception RIP - jmp RhpThrowHwEx ; Throw the ThreadAbortException as a special kind of hardware exception - -Done: - pop rbx - ret - -NESTED_END RhpWaitForGC, _TEXT - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; -;; RhpReversePInvokeAttachOrTrapThread -;; -;; -;; INCOMING: RAX -- address of reverse pinvoke frame -;; -;; PRESERVES: RCX, RDX, R8, R9 -- need to preserve these because the caller assumes they aren't trashed -;; -;; TRASHES: RAX, R10, R11 -;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -NESTED_ENTRY RhpReversePInvokeAttachOrTrapThread, _TEXT - alloc_stack 88h ; alloc scratch area and frame - - ; save the integer arg regs - save_reg_postrsp rcx, (20h + 0*8) - save_reg_postrsp rdx, (20h + 1*8) - save_reg_postrsp r8, (20h + 2*8) - save_reg_postrsp r9, (20h + 3*8) - - ; save the FP arg regs - save_xmm128_postrsp xmm0, (20h + 4*8 + 0*10h) - save_xmm128_postrsp xmm1, (20h + 4*8 + 1*10h) - save_xmm128_postrsp xmm2, (20h + 4*8 + 2*10h) - save_xmm128_postrsp xmm3, (20h + 4*8 + 3*10h) - - END_PROLOGUE - - mov rcx, rax ; rcx <- reverse pinvoke frame - call RhpReversePInvokeAttachOrTrapThread2 - - movdqa xmm0, [rsp + (20h + 4*8 + 0*10h)] - movdqa xmm1, [rsp + (20h + 4*8 + 1*10h)] - movdqa xmm2, [rsp + (20h + 4*8 + 2*10h)] - movdqa xmm3, [rsp + (20h + 4*8 + 3*10h)] - - mov rcx, [rsp + (20h + 0*8)] - mov rdx, [rsp + (20h + 1*8)] - mov r8, [rsp + (20h + 2*8)] - mov r9, [rsp + (20h + 3*8)] - - ;; epilog - add rsp, 88h - ret - -NESTED_END RhpReversePInvokeAttachOrTrapThread, _TEXT - - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; RhpPInvoke @@ -171,7 +51,7 @@ LEAF_ENTRY RhpPInvokeReturn, _TEXT ret @@: ; passing transition frame pointer in rcx - jmp RhpWaitForGC + jmp RhpWaitForGC2 LEAF_END RhpPInvokeReturn, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/arm/AsmMacros.h b/src/coreclr/nativeaot/Runtime/arm/AsmMacros.h index 64f3a530acfb4..96e1be63315f7 100644 --- a/src/coreclr/nativeaot/Runtime/arm/AsmMacros.h +++ b/src/coreclr/nativeaot/Runtime/arm/AsmMacros.h @@ -263,7 +263,6 @@ Name SETS "|$FuncName|" EXTERN RhpGcAlloc EXTERN RhDebugBreak EXTERN RhpWaitForGC2 - EXTERN RhpReversePInvokeAttachOrTrapThread2 EXTERN RhExceptionHandling_FailedAllocation diff --git a/src/coreclr/nativeaot/Runtime/arm/GcProbe.asm b/src/coreclr/nativeaot/Runtime/arm/GcProbe.asm index 3e3f2c608e364..64268c10bba92 100644 --- a/src/coreclr/nativeaot/Runtime/arm/GcProbe.asm +++ b/src/coreclr/nativeaot/Runtime/arm/GcProbe.asm @@ -135,25 +135,6 @@ __PPF_ThreadReg SETS "r2" MEND -;; -;; Macro to clear the hijack state. This is safe to do because the suspension code will not Unhijack this -;; thread if it finds it at an IP that isn't managed code. -;; -;; Register state on entry: -;; r2: thread pointer -;; -;; Register state on exit: -;; r12: trashed -;; - MACRO - ClearHijackState - - mov r12, #0 - str r12, [r2, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] - str r12, [r2, #OFFSETOF__Thread__m_pvHijackedReturnAddress] - MEND - - ;; ;; The prolog for all GC suspension hijacks (normal and stress). Fixes up the hijacked return address, and ;; clears the hijack state. @@ -177,32 +158,11 @@ __PPF_ThreadReg SETS "r2" ;; ldr lr, [r2, #OFFSETOF__Thread__m_pvHijackedReturnAddress] - ClearHijackState - MEND - -;; -;; Set the Thread state and wait for a GC to complete. -;; -;; Register state on entry: -;; r4: thread pointer -;; -;; Register state on exit: -;; r4: thread pointer -;; All other registers trashed -;; - - EXTERN RhpWaitForGCNoAbort - - MACRO - WaitForGCCompletion - - ldr r2, [r4, #OFFSETOF__Thread__m_ThreadStateFlags] - tst r2, #TSF_SuppressGcStress__OR__TSF_DoNotTriggerGC - bne %ft0 + ;; Clear hijack state + mov r12, #0 + str r12, [r2, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + str r12, [r2, #OFFSETOF__Thread__m_pvHijackedReturnAddress] - ldr r2, [r4, #OFFSETOF__Thread__m_pDeferredTransitionFrame] - bl RhpWaitForGCNoAbort -0 MEND @@ -229,38 +189,23 @@ __PPF_ThreadReg SETS "r2" EXTERN RhpPInvokeExceptionGuard - NESTED_ENTRY RhpGcProbeHijackScalarWrapper, .text, RhpPInvokeExceptionGuard + NESTED_ENTRY RhpGcProbeHijackWrapper, .text, RhpPInvokeExceptionGuard HijackTargetFakeProlog - LABELED_RETURN_ADDRESS RhpGcProbeHijackScalar + LABELED_RETURN_ADDRESS RhpGcProbeHijack FixupHijackedCallstack - mov r12, #DEFAULT_FRAME_SAVE_FLAGS - b RhpGcProbe - NESTED_END RhpGcProbeHijackScalarWrapper - NESTED_ENTRY RhpGcProbeHijackObjectWrapper, .text, RhpPInvokeExceptionGuard - - HijackTargetFakeProlog - - LABELED_RETURN_ADDRESS RhpGcProbeHijackObject - - FixupHijackedCallstack - mov r12, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R0 + PTFF_R0_IS_GCREF) - b RhpGcProbe - NESTED_END RhpGcProbeHijackObjectWrapper - - NESTED_ENTRY RhpGcProbeHijackByrefWrapper, .text, RhpPInvokeExceptionGuard - - HijackTargetFakeProlog - - LABELED_RETURN_ADDRESS RhpGcProbeHijackByref - - FixupHijackedCallstack - mov r12, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R0 + PTFF_R0_IS_BYREF) - b RhpGcProbe - NESTED_END RhpGcProbeHijackByrefWrapper + ldr r3, =RhpTrapThreads + ldr r3, [r3] + tst r3, #TrapThreadsFlags_TrapThreads + bne %0 + bx lr +0 + mov r12, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R0) + b RhpWaitForGC + NESTED_END RhpGcProbeHijackWrapper #ifdef FEATURE_GC_STRESS ;; @@ -268,28 +213,15 @@ __PPF_ThreadReg SETS "r2" ;; GC Stress Hijack targets ;; ;; - LEAF_ENTRY RhpGcStressHijackScalar - FixupHijackedCallstack - mov r12, #DEFAULT_FRAME_SAVE_FLAGS - b RhpGcStressProbe - LEAF_END RhpGcStressHijackScalar - - LEAF_ENTRY RhpGcStressHijackObject + LEAF_ENTRY RhpGcStressHijack FixupHijackedCallstack - mov r12, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R0 + PTFF_R0_IS_GCREF) + mov r12, #DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R0 b RhpGcStressProbe - LEAF_END RhpGcStressHijackObject - - LEAF_ENTRY RhpGcStressHijackByref - FixupHijackedCallstack - mov r12, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R0 + PTFF_R0_IS_BYREF) - b RhpGcStressProbe - LEAF_END RhpGcStressHijackByref - + LEAF_END RhpGcStressHijack ;; ;; Worker for our GC stress probes. Do not call directly!! -;; Instead, go through RhpGcStressHijack{Scalar|Object|Byref}. +;; Instead, go through RhpGcStressHijack. ;; This worker performs the GC Stress work and returns to the original return address. ;; ;; Register state on entry: @@ -313,21 +245,11 @@ __PPF_ThreadReg SETS "r2" EXTERN RhpThrowHwEx - LEAF_ENTRY RhpGcProbe - ldr r3, =RhpTrapThreads - ldr r3, [r3] - tst r3, #TrapThreadsFlags_TrapThreads - bne %0 - bx lr -0 - b RhpGcProbeRare - LEAF_END RhpGcProbe - - NESTED_ENTRY RhpGcProbeRare + NESTED_ENTRY RhpWaitForGC PROLOG_PROBE_FRAME r2, r3, r12 - mov r4, r2 - WaitForGCCompletion + ldr r0, [r2, #OFFSETOF__Thread__m_pDeferredTransitionFrame] + bl RhpWaitForGC2 ldr r2, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags] tst r2, #PTFF_THREAD_ABORT @@ -341,7 +263,7 @@ __PPF_ThreadReg SETS "r2" EPILOG_NOP mov r1, lr ;; return address as exception PC EPILOG_BRANCH RhpThrowHwEx - NESTED_END RhpGcProbe + NESTED_END RhpWaitForGC LEAF_ENTRY RhpGcPoll ldr r0, =RhpTrapThreads @@ -357,8 +279,8 @@ __PPF_ThreadReg SETS "r2" ; Unhijack this thread, if necessary. INLINE_THREAD_UNHIJACK r2, r0, r1 ;; trashes r0, r1 - mov r4, r2 - WaitForGCCompletion + ldr r0, [r2, #OFFSETOF__Thread__m_pDeferredTransitionFrame] + bl RhpWaitForGC2 EPILOG_PROBE_FRAME NESTED_END RhpGcPollRare @@ -410,181 +332,7 @@ DREG_SZ equ (SIZEOF__PAL_LIMITED_CONTEXT - (OFFSETOF__PAL_LIMITED_CONTEXT__L NESTED_END RhpHijackForGcStress #endif ;; FEATURE_GC_STRESS - -;; -;; The following functions are _jumped_ to when we need to transfer control from one method to another for EH -;; dispatch. These are needed to properly coordinate with the GC hijacking logic. We are essentially replacing -;; the return from the throwing method with a jump to the handler in the caller, but we need to be aware of -;; any return address hijack that may be in place for GC suspension. These routines use a quick test of the -;; return address against a specific GC hijack routine, and then fixup the stack pointer to what it would be -;; after a real return from the throwing method. Then, if we are not hijacked we can simply jump to the -;; handler in the caller. -;; -;; If we are hijacked, then we jump to a routine that will unhijack appropriatley and wait for the GC to -;; complete. There are also variants for GC stress. -;; -;; Note that at this point we are eiher hijacked or we are not, and this will not change until we return to -;; managed code. It is an invariant of the system that a thread will only attempt to hijack or unhijack -;; another thread while the target thread is suspended in managed code, and this is _not_ managed code. -;; -;; Register state on entry: -;; r0: pointer to this function (i.e., trash) -;; r1: reference to the exception object. -;; r2: handler address we want to jump to. -;; Non-volatile registers are all already correct for return to the caller. -;; LR still contains the return address. -;; -;; Register state on exit: -;; All registers except r0 and lr unchanged -;; - MACRO - RTU_EH_JUMP_HELPER $funcName, $hijackFuncName, $isStress, $stressFuncName - - LEAF_ENTRY $funcName - ; Currently the EH epilog won't pop the return address back into LR, - ; so we have to have a funny load from [sp-4] here to retrieve it. - - ldr r0, =$hijackFuncName - cmp r0, lr - beq RhpGCProbeForEHJump - - IF $isStress - ldr r0, =$stressFuncName - cmp r0, lr - beq RhpGCStressProbeForEHJump - ENDIF - - ;; We are not hijacked, so we can return to the handler. - ;; We return to keep the call/return prediction balanced. - mov lr, r2 ; Update the return address - bx lr - LEAF_END $funcName - MEND - -;; We need an instance of the helper for each possible hijack function. The binder has enough -;; information to determine which one we need to use for any function. - RTU_EH_JUMP_HELPER RhpEHJumpScalar, RhpGcProbeHijackScalar, {false}, 0 - RTU_EH_JUMP_HELPER RhpEHJumpObject, RhpGcProbeHijackObject, {false}, 0 - RTU_EH_JUMP_HELPER RhpEHJumpByref, RhpGcProbeHijackByref, {false}, 0 -#ifdef FEATURE_GC_STRESS - RTU_EH_JUMP_HELPER RhpEHJumpScalarGCStress, RhpGcProbeHijackScalar, {true}, RhpGcStressHijackScalar - RTU_EH_JUMP_HELPER RhpEHJumpObjectGCStress, RhpGcProbeHijackObject, {true}, RhpGcStressHijackObject - RTU_EH_JUMP_HELPER RhpEHJumpByrefGCStress, RhpGcProbeHijackByref, {true}, RhpGcStressHijackByref -#endif - -;; -;; Macro to setup our frame and adjust the location of the EH object reference for EH jump probe funcs. -;; -;; Register state on entry: -;; r0: scratch -;; r1: reference to the exception object. -;; r2: handler address we want to jump to. -;; Non-volatile registers are all already correct for return to the caller. -;; The stack is as if we are just about to returned from the call -;; -;; Register state on exit: -;; r0: reference to the exception object -;; r2: thread pointer -;; - MACRO - EHJumpProbeProlog - - PROLOG_PUSH {r1,r2} ; save the handler address so we can jump to it later (save r1 just for alignment) - PROLOG_NOP mov r0, r1 ; move the ex object reference into r0 so we can report it - ALLOC_PROBE_FRAME - - ;; r2 <- GetThread(), TRASHES r1 - INLINE_GETTHREAD r2, r1 - - ;; Recover the original return address and update the frame - ldr lr, [r2, #OFFSETOF__Thread__m_pvHijackedReturnAddress] - str lr, [sp, #OFFSETOF__PInvokeTransitionFrame__m_RIP] - - ;; ClearHijackState expects thread in r2 (trashes r12). - ClearHijackState - - ; TRASHES r1 - INIT_PROBE_FRAME r2, r1, #PROBE_SAVE_FLAGS_R0_IS_GCREF, (PROBE_FRAME_SIZE + 8) - str sp, [r2, #OFFSETOF__Thread__m_pDeferredTransitionFrame] - MEND - -;; -;; Macro to re-adjust the location of the EH object reference, cleanup the frame, and make the -;; final jump to the handler for EH jump probe funcs. -;; -;; Register state on entry: -;; r0: reference to the exception object -;; r1-r3: scratch -;; -;; Register state on exit: -;; sp: correct for return to the caller -;; r1: reference to the exception object -;; - MACRO - EHJumpProbeEpilog - - FREE_PROBE_FRAME ; This restores exception object back into r0 - EPILOG_NOP mov r1, r0 ; Move the Exception object back into r1 where the catch handler expects it - EPILOG_POP {r0,pc} ; Recover the handler address and jump to it - MEND - -;; -;; We are hijacked for a normal GC (not GC stress), so we need to unhijack and wait for the GC to complete. -;; -;; Register state on entry: -;; r0: reference to the exception object. -;; r2: thread -;; Non-volatile registers are all already correct for return to the caller. -;; The stack is as if we have tail called to this function (lr points to return address). -;; -;; Register state on exit: -;; r7: previous frame pointer -;; r0: reference to the exception object -;; - NESTED_ENTRY RhpGCProbeForEHJump - EHJumpProbeProlog - -#ifdef _DEBUG - ;; - ;; If we get here, then we have been hijacked for a real GC, and our SyncState must - ;; reflect that we've been requested to synchronize. - - ldr r1, =RhpTrapThreads - ldr r1, [r1] - tst r1, #TrapThreadsFlags_TrapThreads - bne %0 - - bl RhDebugBreak -0 -#endif ;; _DEBUG - - mov r4, r2 - WaitForGCCompletion - - EHJumpProbeEpilog - NESTED_END RhpGCProbeForEHJump - #ifdef FEATURE_GC_STRESS -;; -;; We are hijacked for GC Stress (not a normal GC) so we need to invoke the GC stress helper. -;; -;; Register state on entry: -;; r1: reference to the exception object. -;; r2: thread -;; Non-volatile registers are all already correct for return to the caller. -;; The stack is as if we have tail called to this function (lr points to return address). -;; -;; Register state on exit: -;; r7: previous frame pointer -;; r0: reference to the exception object -;; - NESTED_ENTRY RhpGCStressProbeForEHJump - EHJumpProbeProlog - - bl $REDHAWKGCINTERFACE__STRESSGC - - EHJumpProbeEpilog - NESTED_END RhpGCStressProbeForEHJump ;; ;; INVARIANT: Don't trash the argument registers, the binder codegen depends on this. diff --git a/src/coreclr/nativeaot/Runtime/arm/PInvoke.asm b/src/coreclr/nativeaot/Runtime/arm/PInvoke.asm index e1cbe46c77813..234b50c117bad 100644 --- a/src/coreclr/nativeaot/Runtime/arm/PInvoke.asm +++ b/src/coreclr/nativeaot/Runtime/arm/PInvoke.asm @@ -5,94 +5,4 @@ TEXTAREA -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; -;; RhpWaitForGCNoAbort -;; -;; -;; INPUT: r2: transition frame -;; -;; OUTPUT: -;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - NESTED_ENTRY RhpWaitForGCNoAbort - - PROLOG_PUSH {r0-r6,lr} ; Even number of registers to maintain 8-byte stack alignment - PROLOG_VPUSH {d0-d3} ; Save float return value registers as well - - ldr r5, [r2, #OFFSETOF__PInvokeTransitionFrame__m_pThread] - - ldr r0, [r5, #OFFSETOF__Thread__m_ThreadStateFlags] - tst r0, #TSF_DoNotTriggerGc - bne Done - - mov r0, r2 ; passing transition frame in r0 - bl RhpWaitForGC2 - -Done - EPILOG_VPOP {d0-d3} - EPILOG_POP {r0-r6,pc} - - NESTED_END RhpWaitForGCNoAbort - - EXTERN RhpThrowHwEx - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; -;; RhpWaitForGC -;; -;; -;; INPUT: r2: transition frame -;; -;; OUTPUT: -;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - NESTED_ENTRY RhpWaitForGC - PROLOG_PUSH {r0,lr} - - ldr r0, =RhpTrapThreads - ldr r0, [r0] - tst r0, #TrapThreadsFlags_TrapThreads - beq NoWait - bl RhpWaitForGCNoAbort -NoWait - tst r0, #TrapThreadsFlags_AbortInProgress - beq NoAbort - ldr r0, [r2, #OFFSETOF__PInvokeTransitionFrame__m_Flags] - tst r0, #PTFF_THREAD_ABORT - beq NoAbort - EPILOG_POP {r0,r1} ; hijack target address as exception PC - EPILOG_NOP mov r0, #STATUS_REDHAWK_THREAD_ABORT - EPILOG_BRANCH RhpThrowHwEx -NoAbort - EPILOG_POP {r0,pc} - NESTED_END RhpWaitForGC - INLINE_GETTHREAD_CONSTANT_POOL - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; -;; RhpReversePInvokeAttachOrTrapThread -- rare path for RhpPInvoke -;; -;; -;; INPUT: r4: address of reverse pinvoke frame -;; -;; TRASHES: none -;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - NESTED_ENTRY RhpReversePInvokeAttachOrTrapThread - - PROLOG_PUSH {r0-r4,lr} ; Need to save argument registers r0-r3 and lr, r4 is just for alignment - PROLOG_VPUSH {d0-d7} ; Save float argument registers as well since they're volatile - - mov r0, r4 ; passing reverse pinvoke frame pointer in r0 - bl RhpReversePInvokeAttachOrTrapThread2 - - EPILOG_VPOP {d0-d7} - EPILOG_POP {r0-r4,pc} - - NESTED_END RhpReversePInvokeTrapThread - - - end diff --git a/src/coreclr/nativeaot/Runtime/arm64/AsmMacros.h b/src/coreclr/nativeaot/Runtime/arm64/AsmMacros.h index 0cea6597d1047..d092f638be292 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/AsmMacros.h +++ b/src/coreclr/nativeaot/Runtime/arm64/AsmMacros.h @@ -97,7 +97,6 @@ OFFSETOF__Thread__m_alloc_context__alloc_limit equ OFFSETOF__Thread__m_rgbA EXTERN RhExceptionHandling_FailedAllocation EXTERN RhDebugBreak EXTERN RhpWaitForGC2 - EXTERN RhpReversePInvokeAttachOrTrapThread2 EXTERN RhThrowHwEx EXTERN RhThrowEx EXTERN RhRethrow diff --git a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm index b232e380fb075..e5c2f5a4eebe7 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm +++ b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm @@ -8,41 +8,32 @@ EXTERN RhpGcPoll2 EXTERN g_fGcStressStarted -PROBE_SAVE_FLAGS_EVERYTHING equ DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_ALL_SCRATCH + PTFF_SAVE_LR - ;; Build a map of symbols representing offsets into the transition frame (see PInvokeTransitionFrame in ;; rhbinder.h) and keep these two in sync. map 0 field OFFSETOF__PInvokeTransitionFrame__m_PreservedRegs field 10 * 8 ; x19..x28 m_CallersSP field 8 ; SP at routine entry - field 19 * 8 ; x0..x18 - field 8 ; lr -m_SavedNZCV field 8 ; Saved condition flags + field 2 * 8 ; x0..x1 + field 8 ; alignment padding field 4 * 8 ; d0..d3 PROBE_FRAME_SIZE field 0 - ;; Support for setting up a transition frame when performing a GC probe. In many respects this is very - ;; similar to the logic in PUSH_COOP_PINVOKE_FRAME in AsmMacros.h. In most cases setting up the - ;; transition frame comprises the entirety of the caller's prolog (and initial non-prolog code) and - ;; similarly for the epilog. Those cases can be dealt with using PROLOG_PROBE_FRAME and EPILOG_PROBE_FRAME - ;; defined below. For the special cases where additional work has to be done in the prolog we also provide - ;; the lower level macros ALLOC_PROBE_FRAME, FREE_PROBE_FRAME and INIT_PROBE_FRAME that allow more control - ;; to be asserted. + ;; See PUSH_COOP_PINVOKE_FRAME, this macro is very similar, but also saves return registers + ;; and accepts the register bitmask + ;; Call this macro first in the method (no further prolog instructions can be added after this). ;; - ;; Note that we currently employ a significant simplification of frame setup: we always allocate a - ;; maximally-sized PInvokeTransitionFrame and save all of the registers. Depending on the caller this can - ;; lead to up to 20 additional register saves (x0-x18, lr) or 160 bytes of stack space. I have done no - ;; analysis to see whether any of the worst cases occur on performance sensitive paths and whether the - ;; additional saves will show any measurable degradation. - - ;; Perform the parts of setting up a probe frame that can occur during the prolog (and indeed this macro - ;; can only be called from within the prolog). + ;; $threadReg : register containing the Thread* (this will be preserved). + ;; $trashReg : register that can be trashed by this macro + ;; $BITMASK : value to initialize m_dwFlags field with (register or #constant) MACRO - ALLOC_PROBE_FRAME $extraStackSpace, $saveFPRegisters + PUSH_PROBE_FRAME $threadReg, $trashReg, $BITMASK + + ; Define the method prolog, allocating enough stack space for the PInvokeTransitionFrame and saving + ; incoming register values into it. ;; First create PInvokeTransitionFrame - PROLOG_SAVE_REG_PAIR fp, lr, #-(PROBE_FRAME_SIZE + $extraStackSpace)! ;; Push down stack pointer and store FP and LR + PROLOG_SAVE_REG_PAIR fp, lr, #-(PROBE_FRAME_SIZE)! ;; Push down stack pointer and store FP and LR ;; Slot at [sp, #0x10] is reserved for Thread * ;; Slot at [sp, #0x18] is reserved for bitmask of saved registers @@ -56,52 +47,42 @@ PROBE_FRAME_SIZE field 0 ;; Slot at [sp, #0x70] is reserved for caller sp - ;; Save the scratch registers + ;; Save the integer return registers PROLOG_NOP str x0, [sp, #0x78] - PROLOG_NOP stp x1, x2, [sp, #0x80] - PROLOG_NOP stp x3, x4, [sp, #0x90] - PROLOG_NOP stp x5, x6, [sp, #0xA0] - PROLOG_NOP stp x7, x8, [sp, #0xB0] - PROLOG_NOP stp x9, x10, [sp, #0xC0] - PROLOG_NOP stp x11, x12, [sp, #0xD0] - PROLOG_NOP stp x13, x14, [sp, #0xE0] - PROLOG_NOP stp x15, x16, [sp, #0xF0] - PROLOG_NOP stp x17, x18, [sp, #0x100] - PROLOG_NOP str lr, [sp, #0x110] - - ;; Slot at [sp, #0x118] is reserved for NZCV + PROLOG_NOP str x1, [sp, #0x80] + + ;; Slot at [sp, #0x88] is alignment padding ;; Save the floating return registers - IF $saveFPRegisters - PROLOG_NOP stp d0, d1, [sp, #0x120] - PROLOG_NOP stp d2, d3, [sp, #0x130] - ENDIF + PROLOG_NOP stp d0, d1, [sp, #0x90] + PROLOG_NOP stp d2, d3, [sp, #0xA0] + + ;; Perform the rest of the PInvokeTransitionFrame initialization. + str $BITMASK, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags] ; save the register bitmask passed in by caller + str $threadReg,[sp, #OFFSETOF__PInvokeTransitionFrame__m_pThread] ; Thread * (unused by stackwalker) + add $trashReg, sp, #PROBE_FRAME_SIZE ; recover value of caller's SP + str $trashReg, [sp, #m_CallersSP] ; save caller's SP + ;; link the frame into the Thread + mov $trashReg, sp + str $trashReg, [$threadReg, #OFFSETOF__Thread__m_pDeferredTransitionFrame] MEND - ;; Undo the effects of an ALLOC_PROBE_FRAME. This may only be called within an epilog. Note that all - ;; registers are restored (apart for sp and pc), even volatiles. +;; +;; Remove the frame from a previous call to PUSH_PROBE_FRAME from the top of the stack and restore preserved +;; registers and return value to their values from before the probe was called (while also updating any +;; object refs or byrefs). +;; MACRO - FREE_PROBE_FRAME $extraStackSpace, $restoreFPRegisters + POP_PROBE_FRAME - ;; Restore the scratch registers + ;; Restore the integer return registers PROLOG_NOP ldr x0, [sp, #0x78] - PROLOG_NOP ldp x1, x2, [sp, #0x80] - PROLOG_NOP ldp x3, x4, [sp, #0x90] - PROLOG_NOP ldp x5, x6, [sp, #0xA0] - PROLOG_NOP ldp x7, x8, [sp, #0xB0] - PROLOG_NOP ldp x9, x10, [sp, #0xC0] - PROLOG_NOP ldp x11, x12, [sp, #0xD0] - PROLOG_NOP ldp x13, x14, [sp, #0xE0] - PROLOG_NOP ldp x15, x16, [sp, #0xF0] - PROLOG_NOP ldp x17, x18, [sp, #0x100] - PROLOG_NOP ldr lr, [sp, #0x110] + PROLOG_NOP ldr x1, [sp, #0x80] ; Restore the floating return registers - IF $restoreFPRegisters - EPILOG_NOP ldp d0, d1, [sp, #0x120] - EPILOG_NOP ldp d2, d3, [sp, #0x130] - ENDIF + EPILOG_NOP ldp d0, d1, [sp, #0x90] + EPILOG_NOP ldp d2, d3, [sp, #0xA0] ;; Restore callee saved registers EPILOG_RESTORE_REG_PAIR x19, x20, #0x20 @@ -110,100 +91,7 @@ PROBE_FRAME_SIZE field 0 EPILOG_RESTORE_REG_PAIR x25, x26, #0x50 EPILOG_RESTORE_REG_PAIR x27, x28, #0x60 - EPILOG_RESTORE_REG_PAIR fp, lr, #(PROBE_FRAME_SIZE + $extraStackSpace)! - MEND - - ;; Complete the setup of a probe frame allocated with ALLOC_PROBE_FRAME with the initialization that can - ;; occur only outside the prolog (includes linking the frame to the current Thread). This macro assumes SP - ;; is invariant outside of the prolog. - ;; - ;; $threadReg : register containing the Thread* (this will be preserved) - ;; $trashReg : register that can be trashed by this macro - ;; $savedRegsMask : value to initialize m_Flags field with (register or #constant) - ;; $gcFlags : value of gcref / gcbyref flags for saved registers, used only if $savedRegsMask is constant - ;; $frameSize : total size of the method's stack frame (including probe frame size) - MACRO - INIT_PROBE_FRAME $threadReg, $trashReg, $savedRegsMask, $gcFlags, $frameSize - - LCLS BitmaskStr -BitmaskStr SETS "$savedRegsMask" - - str $threadReg, [sp, #OFFSETOF__PInvokeTransitionFrame__m_pThread] ; Thread * - IF BitmaskStr:LEFT:1 == "#" - ;; The savedRegsMask is a constant, remove the leading "#" since the MOVL64 doesn't expect it -BitmaskStr SETS BitmaskStr:RIGHT:(:LEN:BitmaskStr - 1) - MOVL64 $trashReg, $BitmaskStr, $gcFlags - ELSE - ASSERT "$gcFlags" == "" - ;; The savedRegsMask is a register - mov $trashReg, $savedRegsMask - ENDIF - str $trashReg, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags] - add $trashReg, sp, #$frameSize - str $trashReg, [sp, #m_CallersSP] - MEND - - ;; Simple macro to use when setting up the probe frame can comprise the entire prolog. Call this macro - ;; first in the method (no further prolog instructions can be added after this). - ;; - ;; $threadReg : register containing the Thread* (this will be preserved). If defaulted (specify |) then - ;; the current thread will be calculated inline into r2 ($trashReg must not equal r2 in - ;; this case) - ;; $trashReg : register that can be trashed by this macro - ;; $savedRegsMask : value to initialize m_dwFlags field with (register or #constant) - ;; $gcFlags : value of gcref / gcbyref flags for saved registers, used only if $savedRegsMask is constant - MACRO - PROLOG_PROBE_FRAME $threadReg, $trashReg, $savedRegsMask, $gcFlags - - ; Local string tracking the name of the register in which the Thread* is kept. Defaults to the value - ; of $threadReg. - LCLS __PPF_ThreadReg -__PPF_ThreadReg SETS "$threadReg" - - ; Define the method prolog, allocating enough stack space for the PInvokeTransitionFrame and saving - ; incoming register values into it. - ALLOC_PROBE_FRAME 0, {true} - - ; If the caller didn't provide a value for $threadReg then generate code to fetch the Thread* into x2. - ; Record that x2 holds the Thread* in our local variable. - IF "$threadReg" == "" - ASSERT "$trashReg" != "x2" -__PPF_ThreadReg SETS "x2" - INLINE_GETTHREAD $__PPF_ThreadReg, $trashReg - ENDIF - - ; Perform the rest of the PInvokeTransitionFrame initialization. - INIT_PROBE_FRAME $__PPF_ThreadReg, $trashReg, $savedRegsMask, $gcFlags, PROBE_FRAME_SIZE - mov $trashReg, sp - str $trashReg, [$__PPF_ThreadReg, #OFFSETOF__Thread__m_pDeferredTransitionFrame] - MEND - - ; Simple macro to use when PROLOG_PROBE_FRAME was used to set up and initialize the prolog and - ; PInvokeTransitionFrame. This will define the epilog including a return via the restored LR. - MACRO - EPILOG_PROBE_FRAME - - FREE_PROBE_FRAME 0, {true} - EPILOG_RETURN - MEND - -;; -;; Macro to clear the hijack state. This is safe to do because the suspension code will not Unhijack this -;; thread if it finds it at an IP that isn't managed code. -;; -;; Register state on entry: -;; x2: thread pointer -;; -;; Register state on exit: -;; - MACRO - ClearHijackState - - ASSERT OFFSETOF__Thread__m_pvHijackedReturnAddress == (OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation + 8) - ;; Clear m_ppvHijackedReturnAddressLocation and m_pvHijackedReturnAddress - stp xzr, xzr, [x2, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] - ;; Clear m_uHijackedReturnValueFlags - str xzr, [x2, #OFFSETOF__Thread__m_uHijackedReturnValueFlags] + EPILOG_RESTORE_REG_PAIR fp, lr, #(PROBE_FRAME_SIZE)! MEND ;; @@ -231,32 +119,15 @@ __PPF_ThreadReg SETS "x2" ;; Load m_pvHijackedReturnAddress and m_uHijackedReturnValueFlags ldp lr, x12, [x2, #OFFSETOF__Thread__m_pvHijackedReturnAddress] - ClearHijackState - MEND - -;; -;; Set the Thread state and wait for a GC to complete. -;; -;; Register state on entry: -;; x4: thread pointer -;; -;; Register state on exit: -;; x4: thread pointer -;; All other registers trashed -;; - - EXTERN RhpWaitForGCNoAbort - - MACRO - WaitForGCCompletion - - ldr w2, [x4, #OFFSETOF__Thread__m_ThreadStateFlags] - tst w2, #TSF_SuppressGcStress__OR__TSF_DoNotTriggerGC - bne %ft0 + ;; + ;; Clear hijack state + ;; + ASSERT OFFSETOF__Thread__m_pvHijackedReturnAddress == (OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation + 8) + ;; Clear m_ppvHijackedReturnAddressLocation and m_pvHijackedReturnAddress + stp xzr, xzr, [x2, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + ;; Clear m_uHijackedReturnValueFlags + str xzr, [x2, #OFFSETOF__Thread__m_uHijackedReturnValueFlags] - ldr x9, [x4, #OFFSETOF__Thread__m_pDeferredTransitionFrame] - bl RhpWaitForGCNoAbort -0 MEND MACRO @@ -273,10 +144,7 @@ __PPF_ThreadReg SETS "x2" MEND ;; -;; -;; -;; GC Probe Hijack targets -;; +;; GC Probe Hijack target ;; EXTERN RhpPInvokeExceptionGuard @@ -284,73 +152,37 @@ __PPF_ThreadReg SETS "x2" HijackTargetFakeProlog LABELED_RETURN_ADDRESS RhpGcProbeHijack - FixupHijackedCallstack - orr x12, x12, #DEFAULT_FRAME_SAVE_FLAGS - b RhpGcProbe - NESTED_END RhpGcProbeHijackWrapper - -#ifdef FEATURE_GC_STRESS -;; -;; -;; GC Stress Hijack targets -;; -;; - LEAF_ENTRY RhpGcStressHijack - FixupHijackedCallstack - orr x12, x12, #DEFAULT_FRAME_SAVE_FLAGS - b RhpGcStressProbe - LEAF_END RhpGcStressHijack -;; -;; Worker for our GC stress probes. Do not call directly!! -;; Instead, go through RhpGcStressHijack{Scalar|Object|Byref}. -;; This worker performs the GC Stress work and returns to the original return address. -;; -;; Register state on entry: -;; x0: hijacked function return value -;; x1: hijacked function return value -;; x2: thread pointer -;; w12: register bitmask -;; -;; Register state on exit: -;; Scratch registers, except for x0, have been trashed -;; All other registers restored as they were when the hijack was first reached. -;; - NESTED_ENTRY RhpGcStressProbe - PROLOG_PROBE_FRAME x2, x3, x12, - - bl $REDHAWKGCINTERFACE__STRESSGC - EPILOG_PROBE_FRAME - NESTED_END RhpGcStressProbe -#endif ;; FEATURE_GC_STRESS - - LEAF_ENTRY RhpGcProbe ldr x3, =RhpTrapThreads ldr w3, [x3] - tbnz x3, #TrapThreadsFlags_TrapThreads_Bit, RhpGcProbeRare + tbnz x3, #TrapThreadsFlags_TrapThreads_Bit, WaitForGC ret - LEAF_END RhpGcProbe + +WaitForGC + orr x12, x12, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_X0 + PTFF_SAVE_X1) + b RhpWaitForGC + NESTED_END RhpGcProbeHijackWrapper EXTERN RhpThrowHwEx - NESTED_ENTRY RhpGcProbeRare - PROLOG_PROBE_FRAME x2, x3, x12, + NESTED_ENTRY RhpWaitForGC + PUSH_PROBE_FRAME x2, x3, x12 - mov x4, x2 - WaitForGCCompletion + ldr x0, [x2, #OFFSETOF__Thread__m_pDeferredTransitionFrame] + bl RhpWaitForGC2 ldr x2, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags] tbnz x2, #PTFF_THREAD_ABORT_BIT, %F1 - EPILOG_PROBE_FRAME - + POP_PROBE_FRAME + EPILOG_RETURN 1 - FREE_PROBE_FRAME 0, {true} + POP_PROBE_FRAME EPILOG_NOP mov w0, #STATUS_REDHAWK_THREAD_ABORT EPILOG_NOP mov x1, lr ;; return address as exception PC EPILOG_NOP b RhpThrowHwEx - NESTED_END RhpGcProbeRare + NESTED_END RhpWaitForGC LEAF_ENTRY RhpGcPoll ldr x0, =RhpTrapThreads @@ -366,7 +198,42 @@ __PPF_ThreadReg SETS "x2" ret NESTED_END RhpGcPollRare + #ifdef FEATURE_GC_STRESS +;; +;; +;; GC Stress Hijack target +;; +;; + LEAF_ENTRY RhpGcStressHijack + FixupHijackedCallstack + orr x12, x12, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_X0 + PTFF_SAVE_X1) + b RhpGcStressProbe + LEAF_END RhpGcStressHijack +;; +;; Worker for our GC stress probes. Do not call directly!! +;; Instead, go through RhpGcStressHijack. +;; This worker performs the GC Stress work and returns to the original return address. +;; +;; Register state on entry: +;; x0: hijacked function return value +;; x1: hijacked function return value +;; x2: thread pointer +;; w12: register bitmask +;; +;; Register state on exit: +;; Scratch registers, except for x0, have been trashed +;; All other registers restored as they were when the hijack was first reached. +;; + NESTED_ENTRY RhpGcStressProbe + PUSH_PROBE_FRAME x2, x3, x12 + + bl $REDHAWKGCINTERFACE__STRESSGC + + POP_PROBE_FRAME + EPILOG_RETURN + NESTED_END RhpGcStressProbe + NESTED_ENTRY RhpHijackForGcStress ;; This function should be called from right before epilog diff --git a/src/coreclr/nativeaot/Runtime/arm64/PInvoke.S b/src/coreclr/nativeaot/Runtime/arm64/PInvoke.S index 3a24634ff0383..8ed8a497d4abf 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/PInvoke.S +++ b/src/coreclr/nativeaot/Runtime/arm64/PInvoke.S @@ -18,128 +18,6 @@ TSF_Attached_Bit = 0 TSF_SuppressGcStress_Bit = 3 TSF_DoNotTriggerGc_Bit = 4 -////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// -// RhpWaitForGCNoAbort -// -// -// INPUT: x9: transition frame -// -// TRASHES: None -// -////////////////////////////////////////////////////////////////////////////////////////////////////////////// - NESTED_ENTRY RhpWaitForGCNoAbort, _TEXT, NoHandler - - // FP and LR registers - PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -0x40 // Push down stack pointer and store FP and LR - - // Save the integer return registers, as well as the floating return registers - stp x0, x1, [sp, #0x10] - stp d0, d1, [sp, #0x20] - stp d2, d3, [sp, #0x30] - - ldr x0, [x9, #OFFSETOF__PInvokeTransitionFrame__m_pThread] - ldr w0, [x0, #OFFSETOF__Thread__m_ThreadStateFlags] - tbnz x0, #TSF_DoNotTriggerGc_Bit, Done - - mov x0, x9 // passing transition frame in x0 - bl RhpWaitForGC2 - -Done: - ldp x0, x1, [sp, #0x10] - ldp d0, d1, [sp, #0x20] - ldp d2, d3, [sp, #0x30] - EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 0x40 - EPILOG_RETURN - - NESTED_END RhpWaitForGCNoAbort - -////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// -// RhpWaitForGC -// -// -// INPUT: x9: transition frame -// -// TRASHES: x0, x1, x10 -// -////////////////////////////////////////////////////////////////////////////////////////////////////////////// - NESTED_ENTRY RhpWaitForGC, _TEXT, NoHandler - - PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -0x10 - - PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, 10 - - tbz x10, #TrapThreadsFlags_TrapThreads_Bit, NoWait - bl RhpWaitForGCNoAbort -NoWait: - tbz x10, #TrapThreadsFlags_AbortInProgress_Bit, NoAbort - ldr x10, [x9, #OFFSETOF__PInvokeTransitionFrame__m_Flags] - tbz x10, #PTFF_THREAD_ABORT_BIT, NoAbort - - EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 0x10 - mov w0, #STATUS_REDHAWK_THREAD_ABORT - mov x1, lr // hijack target address as exception PC - b RhpThrowHwEx - -NoAbort: - EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 0x10 - EPILOG_RETURN - - NESTED_END RhpWaitForGC, _TEXT - -////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// -// RhpReversePInvokeAttachOrTrapThread -- rare path for RhpPInvoke -// -// -// INPUT: x9: address of reverse pinvoke frame -// -// PRESERVES: x0-x8 -- need to preserve these because the caller assumes they are not trashed -// -// TRASHES: none -// -////////////////////////////////////////////////////////////////////////////////////////////////////////////// - NESTED_ENTRY RhpReversePInvokeAttachOrTrapThread, _TEXT, NoHandler - - // FP and LR registers - PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -0xA0 // Push down stack pointer and store FP and LR - - // Need to save argument registers x0-x7 and the return buffer register x8 (twice for 16B alignment) - stp x0, x1, [sp, #0x10] - stp x2, x3, [sp, #0x20] - stp x4, x5, [sp, #0x30] - stp x6, x7, [sp, #0x40] - stp x8, x8, [sp, #0x50] - - // Save float argument registers as well since they are volatile - stp d0, d1, [sp, #0x60] - stp d2, d3, [sp, #0x70] - stp d4, d5, [sp, #0x80] - stp d6, d7, [sp, #0x90] - - mov x0, x9 // passing reverse pinvoke frame pointer in x0 - bl RhpReversePInvokeAttachOrTrapThread2 - - // Restore floating point registers - ldp d0, d1, [sp, #0x60] - ldp d2, d3, [sp, #0x70] - ldp d4, d5, [sp, #0x80] - ldp d6, d7, [sp, #0x90] - - // Restore the argument registers - ldp x0, x1, [sp, #0x10] - ldp x2, x3, [sp, #0x20] - ldp x4, x5, [sp, #0x30] - ldp x6, x7, [sp, #0x40] - ldr x8, [sp, #0x50] - - // Restore FP and LR registers, and free the allocated stack block - EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 0xA0 - EPILOG_RETURN - - NESTED_END RhpReversePInvokeAttachOrTrapThread, _TEXT - // // RhpPInvoke // diff --git a/src/coreclr/nativeaot/Runtime/arm64/PInvoke.asm b/src/coreclr/nativeaot/Runtime/arm64/PInvoke.asm index 540df990c853c..475737fea71a0 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/PInvoke.asm +++ b/src/coreclr/nativeaot/Runtime/arm64/PInvoke.asm @@ -5,133 +5,6 @@ TEXTAREA -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; -;; RhpWaitForGCNoAbort -;; -;; -;; INPUT: x9: transition frame -;; -;; TRASHES: None -;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - NESTED_ENTRY RhpWaitForGCNoAbort - - ;; FP and LR registers - PROLOG_SAVE_REG_PAIR fp, lr, #-0x40! ;; Push down stack pointer and store FP and LR - - ;; Save the integer return registers, as well as the floating return registers - stp x0, x1, [sp, #0x10] - stp d0, d1, [sp, #0x20] - stp d2, d3, [sp, #0x30] - - ldr x0, [x9, #OFFSETOF__PInvokeTransitionFrame__m_pThread] - ldr w0, [x0, #OFFSETOF__Thread__m_ThreadStateFlags] - tbnz x0, #TSF_DoNotTriggerGc_Bit, Done - - mov x0, x9 ; passing transition frame in x0 - bl RhpWaitForGC2 - -Done - ldp x0, x1, [sp, #0x10] - ldp d0, d1, [sp, #0x20] - ldp d2, d3, [sp, #0x30] - EPILOG_RESTORE_REG_PAIR fp, lr, #0x40! - EPILOG_RETURN - - NESTED_END RhpWaitForGCNoAbort - - EXTERN RhpThrowHwEx - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; -;; RhpWaitForGC -;; -;; -;; INPUT: x9: transition frame -;; -;; TRASHES: x0, x1, x10 -;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - NESTED_ENTRY RhpWaitForGC - - PROLOG_SAVE_REG_PAIR fp, lr, #-0x10! - - ldr x10, =RhpTrapThreads - ldr w10, [x10] - tbz x10, #TrapThreadsFlags_TrapThreads_Bit, NoWait - bl RhpWaitForGCNoAbort -NoWait - tbz x10, #TrapThreadsFlags_AbortInProgress_Bit, NoAbort - ldr x10, [x9, #OFFSETOF__PInvokeTransitionFrame__m_Flags] - tbz x10, #PTFF_THREAD_ABORT_BIT, NoAbort - - EPILOG_RESTORE_REG_PAIR fp, lr, #0x10! - EPILOG_NOP mov w0, #STATUS_REDHAWK_THREAD_ABORT - EPILOG_NOP mov x1, lr ; hijack target address as exception PC - EPILOG_NOP b RhpThrowHwEx - -NoAbort - EPILOG_RESTORE_REG_PAIR fp, lr, #0x10! - EPILOG_RETURN - - NESTED_END RhpWaitForGC - - INLINE_GETTHREAD_CONSTANT_POOL - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; -;; RhpReversePInvokeAttachOrTrapThread -- rare path for RhpPInvoke -;; -;; -;; INPUT: x9: address of reverse pinvoke frame -;; -;; PRESERVES: x0-x8 -- need to preserve these because the caller assumes they aren't trashed -;; -;; TRASHES: none -;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - NESTED_ENTRY RhpReversePInvokeAttachOrTrapThread - - ;; FP and LR registers - PROLOG_SAVE_REG_PAIR fp, lr, #-0xA0! ;; Push down stack pointer and store FP and LR - - ;; Need to save argument registers x0-x7 and the return buffer register x8 (twice for 16B alignment) - stp x0, x1, [sp, #0x10] - stp x2, x3, [sp, #0x20] - stp x4, x5, [sp, #0x30] - stp x6, x7, [sp, #0x40] - stp x8, x8, [sp, #0x50] - - ;; Save float argument registers as well since they're volatile - stp d0, d1, [sp, #0x60] - stp d2, d3, [sp, #0x70] - stp d4, d5, [sp, #0x80] - stp d6, d7, [sp, #0x90] - - mov x0, x9 ; passing reverse pinvoke frame pointer in x0 - bl RhpReversePInvokeAttachOrTrapThread2 - - ;; Restore floating point registers - ldp d0, d1, [sp, #0x60] - ldp d2, d3, [sp, #0x70] - ldp d4, d5, [sp, #0x80] - ldp d6, d7, [sp, #0x90] - - ;; Restore the argument registers - ldp x0, x1, [sp, #0x10] - ldp x2, x3, [sp, #0x20] - ldp x4, x5, [sp, #0x30] - ldp x6, x7, [sp, #0x40] - ldr x8, [sp, #0x50] - - ;; Restore FP and LR registers, and free the allocated stack block - EPILOG_RESTORE_REG_PAIR fp, lr, #0xA0! - EPILOG_RETURN - - NESTED_END RhpReversePInvokeAttachOrTrapThread - - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; RhpPInvoke diff --git a/src/coreclr/nativeaot/Runtime/i386/AsmMacros.inc b/src/coreclr/nativeaot/Runtime/i386/AsmMacros.inc index 1bb6d2d34bba9..495a318c7bf92 100644 --- a/src/coreclr/nativeaot/Runtime/i386/AsmMacros.inc +++ b/src/coreclr/nativeaot/Runtime/i386/AsmMacros.inc @@ -180,7 +180,6 @@ G_EPHEMERAL_LOW equ _g_ephemeral_low G_EPHEMERAL_HIGH equ _g_ephemeral_high G_CARD_TABLE equ _g_card_table RhpWaitForGC2 equ @RhpWaitForGC2@4 -RhpReversePInvokeAttachOrTrapThread2 equ @RhpReversePInvokeAttachOrTrapThread2@4 RhpTrapThreads equ _RhpTrapThreads ifdef FEATURE_GC_STRESS @@ -194,7 +193,6 @@ endif ;; FEATURE_GC_STRESS EXTERN RhpGcAlloc : PROC EXTERN RhDebugBreak : PROC EXTERN RhpWaitForGC2 : PROC -EXTERN RhpReversePInvokeAttachOrTrapThread2 : PROC EXTERN RhExceptionHandling_FailedAllocation : PROC EXTERN RhThrowHwEx : PROC EXTERN RhThrowEx : PROC diff --git a/src/coreclr/nativeaot/Runtime/i386/GcProbe.asm b/src/coreclr/nativeaot/Runtime/i386/GcProbe.asm index c27c6645a2c6a..22d7cda1ef285 100644 --- a/src/coreclr/nativeaot/Runtime/i386/GcProbe.asm +++ b/src/coreclr/nativeaot/Runtime/i386/GcProbe.asm @@ -13,20 +13,6 @@ include AsmMacros.inc DEFAULT_PROBE_SAVE_FLAGS equ PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_RSP PROBE_SAVE_FLAGS_EVERYTHING equ DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_ALL_SCRATCH PROBE_SAVE_FLAGS_RAX_IS_GCREF equ DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_GCREF -;; -;; Macro to clear the hijack state. This is safe to do because the suspension code will not Unhijack this -;; thread if it finds it at an IP that isn't managed code. -;; -;; Register state on entry: -;; EDX: thread pointer -;; -;; Register state on exit: -;; No changes -;; -ClearHijackState macro - mov dword ptr [edx + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], 0 - mov dword ptr [edx + OFFSETOF__Thread__m_pvHijackedReturnAddress], 0 -endm ;; ;; The prolog for all GC suspension hijackes (normal and stress). Sets up an EBP frame, @@ -58,7 +44,12 @@ HijackFixupProlog macro mov ecx, [edx + OFFSETOF__Thread__m_pvHijackedReturnAddress] mov [ebp + 4], ecx - ClearHijackState + ;; + ;; Clear hijack state + ;; + mov dword ptr [edx + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], 0 + mov dword ptr [edx + OFFSETOF__Thread__m_pvHijackedReturnAddress], 0 + endm ;; @@ -145,33 +136,6 @@ PopProbeFrame macro pop eax endm -;; -;; Set the Thread state and wait for a GC to complete. -;; -;; Register state on entry: -;; ESP: pointer to a PInvokeTransitionFrame on the stack -;; EBX: thread pointer -;; EBP: EBP frame -;; -;; Register state on exit: -;; ESP: pointer to a PInvokeTransitionFrame on the stack -;; EBX: thread pointer -;; EBP: EBP frame -;; All other registers trashed -;; - -EXTERN _RhpWaitForGCNoAbort : PROC - -WaitForGCCompletion macro - test dword ptr [ebx + OFFSETOF__Thread__m_ThreadStateFlags], TSF_SuppressGcStress + TSF_DoNotTriggerGc - jnz @F - - mov ecx, esp - call _RhpWaitForGCNoAbort -@@: - -endm - RhpThrowHwEx equ @RhpThrowHwEx@0 extern RhpThrowHwEx : proc @@ -189,16 +153,11 @@ extern RhpThrowHwEx : proc ;; Register state on exit: ;; All registers restored as they were when the hijack was first reached. ;; -RhpGcProbe proc - test [RhpTrapThreads], TrapThreadsFlags_TrapThreads - jnz SynchronousRendezVous - - HijackFixupEpilog - -SynchronousRendezVous: +RhpWaitForGC proc PushProbeFrame ecx ; bitmask in ECX - WaitForGCCompletion + mov ecx, esp + call RhpWaitForGC2 mov edx, [esp + OFFSETOF__PInvokeTransitionFrame__m_Flags] ;; @@ -218,7 +177,7 @@ Abort: pop edx ;; return address as exception RIP jmp RhpThrowHwEx -RhpGcProbe endp +RhpWaitForGC endp ifdef FEATURE_GC_STRESS ;; @@ -271,51 +230,23 @@ RhpGcStressProbe endp endif ;; FEATURE_GC_STRESS -FASTCALL_FUNC RhpGcProbeHijackScalar, 0 - - HijackFixupProlog - mov ecx, DEFAULT_PROBE_SAVE_FLAGS - jmp RhpGcProbe - -FASTCALL_ENDFUNC - -FASTCALL_FUNC RhpGcProbeHijackObject, 0 - +FASTCALL_FUNC RhpGcProbeHijack, 0 HijackFixupProlog - mov ecx, DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_GCREF - jmp RhpGcProbe - -FASTCALL_ENDFUNC - -FASTCALL_FUNC RhpGcProbeHijackByref, 0 + test [RhpTrapThreads], TrapThreadsFlags_TrapThreads + jnz WaitForGC + HijackFixupEpilog - HijackFixupProlog - mov ecx, DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_BYREF - jmp RhpGcProbe +WaitForGC: + mov ecx, DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_RAX + jmp RhpWaitForGC FASTCALL_ENDFUNC ifdef FEATURE_GC_STRESS -FASTCALL_FUNC RhpGcStressHijackScalar, 0 - - HijackFixupProlog - mov ecx, DEFAULT_PROBE_SAVE_FLAGS - jmp RhpGcStressProbe - -FASTCALL_ENDFUNC - -FASTCALL_FUNC RhpGcStressHijackObject, 0 - - HijackFixupProlog - mov ecx, DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_GCREF - jmp RhpGcStressProbe - -FASTCALL_ENDFUNC - -FASTCALL_FUNC RhpGcStressHijackByref, 0 +FASTCALL_FUNC RhpGcStressHijack, 0 HijackFixupProlog - mov ecx, DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_BYREF + mov ecx, DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_RAX jmp RhpGcStressProbe FASTCALL_ENDFUNC @@ -356,201 +287,6 @@ FASTCALL_FUNC RhpHijackForGcStress, 0 pop ebp ret FASTCALL_ENDFUNC -endif ;; FEATURE_GC_STRESS - -;; -;; The following functions are _jumped_ to when we need to transfer control from one method to another for EH -;; dispatch. These are needed to properly coordinate with the GC hijacking logic. We are essentially replacing -;; the return from the throwing method with a jump to the handler in the caller, but we need to be aware of -;; any return address hijack that may be in place for GC suspension. These routines use a quick test of the -;; return address against a specific GC hijack routine, and then fixup the stack pointer to what it would be -;; after a real return from the throwing method. Then, if we are not hijacked we can simply jump to the -;; handler in the caller. -;; -;; If we are hijacked, then we jump to a routine that will unhijack appropriatley and wait for the GC to -;; complete. There are also variants for GC stress. -;; -;; Note that at this point we are eiher hijacked or we are not, and this will not change until we return to -;; managed code. It is an invariant of the system that a thread will only attempt to hijack or unhijack -;; another thread while the target thread is suspended in managed code, and this is _not_ managed code. -;; -;; Register state on entry: -;; EAX: handler address we want to jump to. -;; ECX: reference to the exception object. -;; EDX: what ESP should be after the return address and arg space are removed. -;; EBX, ESI, EDI, and EBP are all already correct for return to the caller. -;; The stack still contains the return address and the arguments to the call. -;; -;; Register state on exit: -;; ESP: what it would be after a complete return to the caller. -;; -RTU_EH_JUMP_HELPER macro funcName, hijackFuncName, isStress, stressFuncName -FASTCALL_FUNC funcName, 0 - cmp [esp], hijackFuncName - je RhpGCProbeForEHJump - -IF isStress EQ 1 - cmp [esp], stressFuncName - je RhpGCStressProbeForEHJump -ENDIF - - ;; We are not hijacked, so we can return to the handler. - ;; We return to keep the call/return prediction balanced. - mov esp, edx ; The stack is now as if we have returned from the call. - push eax ; Push the handler as the return address. - ret - -FASTCALL_ENDFUNC -endm - - -;; We need an instance of the helper for each possible hijack function. The binder has enough -;; information to determine which one we need to use for any function. -RTU_EH_JUMP_HELPER RhpEHJumpScalar, @RhpGcProbeHijackScalar@0, 0, 0 -RTU_EH_JUMP_HELPER RhpEHJumpObject, @RhpGcProbeHijackObject@0, 0, 0 -RTU_EH_JUMP_HELPER RhpEHJumpByref, @RhpGcProbeHijackByref@0, 0, 0 -ifdef FEATURE_GC_STRESS -RTU_EH_JUMP_HELPER RhpEHJumpScalarGCStress, @RhpGcProbeHijackScalar@0, 1, @RhpGcStressHijackScalar@0 -RTU_EH_JUMP_HELPER RhpEHJumpObjectGCStress, @RhpGcProbeHijackObject@0, 1, @RhpGcStressHijackObject@0 -RTU_EH_JUMP_HELPER RhpEHJumpByrefGCStress, @RhpGcProbeHijackByref@0, 1, @RhpGcStressHijackByref@0 -endif - -;; -;; Macro to setup our EBP frame and adjust the location of the EH object reference for EH jump probe funcs. -;; -;; Register state on entry: -;; EAX: handler address we want to jump to. -;; ECX: reference to the exception object. -;; EDX: scratch -;; EBX, ESI, EDI, and EBP are all already correct for return to the caller. -;; The stack is as if we have returned from the call -;; -;; Register state on exit: -;; ESP: ebp frame -;; EBP: ebp frame setup with space reserved for the repaired return address -;; EAX: reference to the exception object -;; ECX: scratch -;; -EHJumpProbeProlog macro - push eax ; save a slot for the repaired return address - push ebp ; setup an ebp frame to keep the stack nicely crawlable - mov ebp, esp - push eax ; save the handler address so we can jump to it later - mov eax, ecx ; move the ex object reference into eax so we can report it -endm - -;; -;; Macro to re-adjust the location of the EH object reference, cleanup the EBP frame, and make the -;; final jump to the handler for EH jump probe funcs. -;; -;; Register state on entry: -;; EAX: reference to the exception object -;; ESP: ebp frame -;; EBP: ebp frame setup with the correct return (handler) address -;; ECX: scratch -;; EDX: scratch -;; -;; Register state on exit: -;; ESP: correct for return to the caller -;; EBP: previous ebp frame -;; ECX: reference to the exception object -;; EDX: trashed -;; -EHJumpProbeEpilog macro - mov ecx, eax ; Put the EX obj ref back into ecx for the handler. - pop eax ; Recover the handler address. - pop ebp ; Pop the ebp frame we setup. - pop edx ; Pop the original return address, which we do not need. - push eax ; Push the handler as the return address. - ret -endm - -;; -;; We are hijacked for a normal GC (not GC stress), so we need to unhijcak and wait for the GC to complete. -;; -;; Register state on entry: -;; EAX: handler address we want to jump to. -;; ECX: reference to the exception object. -;; EDX: what ESP should be after the return address and arg space are removed. -;; EBX, ESI, EDI, and EBP are all already correct for return to the caller. -;; The stack is as if we have returned from the call -;; -;; Register state on exit: -;; ESP: correct for return to the caller -;; EBP: previous ebp frame -;; ECX: reference to the exception object -;; -RhpGCProbeForEHJump proc - mov esp, edx ; The stack is now as if we have returned from the call. - EHJumpProbeProlog - - ;; edx <- GetThread(), TRASHES ecx - INLINE_GETTHREAD edx, ecx - - ;; Fix the stack by pushing the original return address - mov ecx, [edx + OFFSETOF__Thread__m_pvHijackedReturnAddress] - mov [ebp + 4], ecx - - ClearHijackState - -ifdef _DEBUG - ;; - ;; If we get here, then we have been hijacked for a real GC, and our SyncState must - ;; reflect that we've been requested to synchronize. - - test [RhpTrapThreads], TrapThreadsFlags_TrapThreads - jnz @F - - call RhDebugBreak -@@: -endif ;; _DEBUG - - - PushProbeFrame PROBE_SAVE_FLAGS_RAX_IS_GCREF - WaitForGCCompletion - PopProbeFrame - - EHJumpProbeEpilog - -RhpGCProbeForEHJump endp - -ifdef FEATURE_GC_STRESS -;; -;; We are hijacked for GC Stress (not a normal GC) so we need to invoke the GC stress helper. -;; -;; Register state on entry: -;; EAX: handler address we want to jump to. -;; ECX: reference to the exception object. -;; EDX: what ESP should be after the return address and arg space are removed. -;; EBX, ESI, EDI, and EBP are all already correct for return to the caller. -;; The stack is as if we have returned from the call -;; -;; Register state on exit: -;; ESP: correct for return to the caller -;; EBP: previous ebp frame -;; ECX: reference to the exception object -;; -RhpGCStressProbeForEHJump proc - mov esp, edx ; The stack is now as if we have returned from the call. - EHJumpProbeProlog - - ;; edx <- GetThread(), TRASHES ecx - INLINE_GETTHREAD edx, ecx - - ;; Fix the stack by pushing the original return address - mov ecx, [edx + OFFSETOF__Thread__m_pvHijackedReturnAddress] - mov [ebp + 4], ecx - - ClearHijackState - - PushProbeFrame PROBE_SAVE_FLAGS_RAX_IS_GCREF - StressGC - PopProbeFrame - - EHJumpProbeEpilog - -RhpGCStressProbeForEHJump endp - endif ;; FEATURE_GC_STRESS end diff --git a/src/coreclr/nativeaot/Runtime/i386/PInvoke.asm b/src/coreclr/nativeaot/Runtime/i386/PInvoke.asm index 7bd31cf93f246..90f0d083a842a 100644 --- a/src/coreclr/nativeaot/Runtime/i386/PInvoke.asm +++ b/src/coreclr/nativeaot/Runtime/i386/PInvoke.asm @@ -9,80 +9,4 @@ include AsmMacros.inc -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; -;; RhpWaitForGCNoAbort -;; -;; -;; INPUT: ECX: transition frame -;; -;; OUTPUT: -;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -_RhpWaitForGCNoAbort proc public - push ebp - mov ebp, esp - push eax - push edx - push ebx - push esi - - mov esi, [ecx + OFFSETOF__PInvokeTransitionFrame__m_pThread] - - test dword ptr [esi + OFFSETOF__Thread__m_ThreadStateFlags], TSF_DoNotTriggerGc - jnz Done - - ; passing transition frame pointer in ecx - call RhpWaitForGC2 - -Done: - pop esi - pop ebx - pop edx - pop eax - pop ebp - ret -_RhpWaitForGCNoAbort endp - -RhpThrowHwEx equ @RhpThrowHwEx@0 -EXTERN RhpThrowHwEx : PROC - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; -;; RhpWaitForGC -;; -;; -;; INPUT: ECX: transition frame -;; -;; OUTPUT: -;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -_RhpWaitForGC proc public - push ebp - mov ebp, esp - push ebx - - mov ebx, ecx - test [RhpTrapThreads], TrapThreadsFlags_TrapThreads - jz NoWait - - call _RhpWaitForGCNoAbort -NoWait: - test [RhpTrapThreads], TrapThreadsFlags_AbortInProgress - jz Done - test dword ptr [ebx + OFFSETOF__PInvokeTransitionFrame__m_Flags], PTFF_THREAD_ABORT - jz Done - - mov ecx, STATUS_REDHAWK_THREAD_ABORT - pop ebx - pop ebp - pop edx ; return address as exception RIP - jmp RhpThrowHwEx ; Throw the ThreadAbortException as a special kind of hardware exception -Done: - pop ebx - pop ebp - ret -_RhpWaitForGC endp - - end diff --git a/src/coreclr/nativeaot/Runtime/portable.cpp b/src/coreclr/nativeaot/Runtime/portable.cpp index 4d4db0343be5f..704636d401c76 100644 --- a/src/coreclr/nativeaot/Runtime/portable.cpp +++ b/src/coreclr/nativeaot/Runtime/portable.cpp @@ -406,15 +406,7 @@ void * ReturnFromCallDescrThunk; // Return address hijacking // #if !defined (HOST_ARM64) -COOP_PINVOKE_HELPER(void, RhpGcStressHijackScalar, ()) -{ - ASSERT_UNCONDITIONALLY("NYI"); -} -COOP_PINVOKE_HELPER(void, RhpGcStressHijackObject, ()) -{ - ASSERT_UNCONDITIONALLY("NYI"); -} -COOP_PINVOKE_HELPER(void, RhpGcStressHijackByref, ()) +COOP_PINVOKE_HELPER(void, RhpGcStressHijack, ()) { ASSERT_UNCONDITIONALLY("NYI"); } diff --git a/src/coreclr/nativeaot/Runtime/thread.cpp b/src/coreclr/nativeaot/Runtime/thread.cpp index cecee8f9e6a82..9256517277b2c 100644 --- a/src/coreclr/nativeaot/Runtime/thread.cpp +++ b/src/coreclr/nativeaot/Runtime/thread.cpp @@ -544,63 +544,17 @@ void Thread::GcScanRootsWorker(void * pfnEnumCallback, void * pvCallbackData, St #ifndef DACCESS_COMPILE EXTERN_C void FASTCALL RhpSuspendRedirected(); - -#if defined(TARGET_ARM64) || defined(TARGET_UNIX) EXTERN_C void FASTCALL RhpGcProbeHijack(); - -static void* NormalHijackTargets[1] = -{ - reinterpret_cast(RhpGcProbeHijack) -}; -#else // TARGET_ARM64 || TARGET_UNIX -EXTERN_C void FASTCALL RhpGcProbeHijackScalar(); -EXTERN_C void FASTCALL RhpGcProbeHijackObject(); -EXTERN_C void FASTCALL RhpGcProbeHijackByref(); - -static void* NormalHijackTargets[3] = -{ - reinterpret_cast(RhpGcProbeHijackScalar), // GCRK_Scalar = 0, - reinterpret_cast(RhpGcProbeHijackObject), // GCRK_Object = 1, - reinterpret_cast(RhpGcProbeHijackByref) // GCRK_Byref = 2, -}; -#endif // TARGET_ARM64 || TARGET_UNIX - -#ifdef FEATURE_GC_STRESS -#ifndef TARGET_ARM64 -EXTERN_C void FASTCALL RhpGcStressHijackScalar(); -EXTERN_C void FASTCALL RhpGcStressHijackObject(); -EXTERN_C void FASTCALL RhpGcStressHijackByref(); - -static void* GcStressHijackTargets[3] = -{ - reinterpret_cast(RhpGcStressHijackScalar), // GCRK_Scalar = 0, - reinterpret_cast(RhpGcStressHijackObject), // GCRK_Object = 1, - reinterpret_cast(RhpGcStressHijackByref) // GCRK_Byref = 2, -}; -#else // TARGET_ARM64 EXTERN_C void FASTCALL RhpGcStressHijack(); -static void* GcStressHijackTargets[1] = -{ - reinterpret_cast(RhpGcStressHijack) -}; -#endif // TARGET_ARM64 -#endif // FEATURE_GC_STRESS - // static -bool Thread::IsHijackTarget(void * address) +bool Thread::IsHijackTarget(void* address) { - for (size_t i = 0; i < ARRAY_SIZE(NormalHijackTargets); i++) - { - if (NormalHijackTargets[i] == address) - return true; - } + if (&RhpGcProbeHijack == address) + return true; #ifdef FEATURE_GC_STRESS - for (size_t i = 0; i < ARRAY_SIZE(GcStressHijackTargets); i++) - { - if (GcStressHijackTargets[i] == address) - return true; - } + if (&RhpGcStressHijack == address) + return true; #endif // FEATURE_GC_STRESS return false; } @@ -617,7 +571,7 @@ void Thread::Hijack() } #if defined(TARGET_ARM64) && defined(TARGET_UNIX) - // TODO: RhpGcProbe and related asm helpers NYI for ARM64/UNIX. + // TODO: RhpGcProbeHijack and related asm helpers NYI for ARM64/UNIX. // disabling hijacking for now. return; #endif @@ -693,7 +647,7 @@ void Thread::HijackCallback(NATIVE_CONTEXT* pThreadContext, void* pThreadToHijac #endif //FEATURE_SUSPEND_REDIRECTION } - pThread->HijackReturnAddress(pThreadContext, NormalHijackTargets); + pThread->HijackReturnAddress(pThreadContext, &RhpGcProbeHijack); } #ifdef FEATURE_GC_STRESS @@ -734,7 +688,7 @@ void Thread::HijackForGcStress(PAL_LIMITED_CONTEXT * pSuspendCtx) } if (bForceGC || pInstance->ShouldHijackCallsiteForGcStress(ip)) { - pCurrentThread->HijackReturnAddress(pSuspendCtx, GcStressHijackTargets); + pCurrentThread->HijackReturnAddress(pSuspendCtx, &RhpGcStressHijack); } } #endif // FEATURE_GC_STRESS @@ -742,7 +696,7 @@ void Thread::HijackForGcStress(PAL_LIMITED_CONTEXT * pSuspendCtx) // This function is called from a thread to place a return hijack onto its own stack for GC stress cases // via Thread::HijackForGcStress above. The only constraint on the suspension is that the // stack be crawlable enough to yield the location of the return address. -void Thread::HijackReturnAddress(PAL_LIMITED_CONTEXT* pSuspendCtx, void* pvHijackTargets[]) +void Thread::HijackReturnAddress(PAL_LIMITED_CONTEXT* pSuspendCtx, HijackFunc* pfnHijackFunction) { if (IsDoNotTriggerGcSet()) return; @@ -753,7 +707,7 @@ void Thread::HijackReturnAddress(PAL_LIMITED_CONTEXT* pSuspendCtx, void* pvHijac return; } - HijackReturnAddressWorker(&frameIterator, pvHijackTargets); + HijackReturnAddressWorker(&frameIterator, pfnHijackFunction); } // This function is called in one of two scenarios: @@ -761,19 +715,19 @@ void Thread::HijackReturnAddress(PAL_LIMITED_CONTEXT* pSuspendCtx, void* pvHijac // thread is OS suspended at pSuspendCtx in managed code. // 2) from a thread to place a return hijack onto its own stack for GC suspension. In this case the target // thread is interrupted at pSuspendCtx in managed code via a signal or similar. -void Thread::HijackReturnAddress(NATIVE_CONTEXT* pSuspendCtx, void * pvHijackTargets[]) +void Thread::HijackReturnAddress(NATIVE_CONTEXT* pSuspendCtx, HijackFunc* pfnHijackFunction) { ASSERT(!IsDoNotTriggerGcSet()); StackFrameIterator frameIterator(this, pSuspendCtx); ASSERT(frameIterator.IsValid()); - HijackReturnAddressWorker(&frameIterator, pvHijackTargets); + HijackReturnAddressWorker(&frameIterator, pfnHijackFunction); } -void Thread::HijackReturnAddressWorker(StackFrameIterator* frameIterator, void* pvHijackTargets[]) +void Thread::HijackReturnAddressWorker(StackFrameIterator* frameIterator, HijackFunc* pfnHijackFunction) { - PTR_PTR_VOID ppvRetAddrLocation; + void** ppvRetAddrLocation; GCRefKind retValueKind; frameIterator->CalculateCurrentMethodState(); @@ -784,14 +738,11 @@ void Thread::HijackReturnAddressWorker(StackFrameIterator* frameIterator, void* { ASSERT(ppvRetAddrLocation != NULL); - // check if hijack location is the same + // if the new hijack location is the same, we do nothing if (m_ppvHijackedReturnAddressLocation == ppvRetAddrLocation) return; - // ARM64 epilogs have a window between loading the hijackable return address into LR and the RET instruction. - // We cannot hijack or unhijack a thread while it is suspended in that window unless we implement hijacking - // via LR register modification. Therefore it is important to check our ability to hijack the thread before - // unhijacking it. + // we only unhijack if we are going to install a new or better hijack. CrossThreadUnhijack(); void* pvRetAddr = *ppvRetAddrLocation; @@ -800,14 +751,8 @@ void Thread::HijackReturnAddressWorker(StackFrameIterator* frameIterator, void* m_ppvHijackedReturnAddressLocation = ppvRetAddrLocation; m_pvHijackedReturnAddress = pvRetAddr; -#if defined(TARGET_ARM64) || defined(TARGET_UNIX) m_uHijackedReturnValueFlags = ReturnKindToTransitionFrameFlags(retValueKind); - *ppvRetAddrLocation = pvHijackTargets[0]; -#else - void* pvHijackTarget = pvHijackTargets[retValueKind]; - ASSERT_MSG(IsHijackTarget(pvHijackTarget), "unexpected method used as hijack target"); - *ppvRetAddrLocation = pvHijackTarget; -#endif + *ppvRetAddrLocation = (void*)pfnHijackFunction; STRESS_LOG2(LF_STACKWALK, LL_INFO10000, "InternalHijack: TgtThread = %llx, IP = %p\n", GetPalThreadIdForLogging(), frameIterator->GetRegisterSet()->GetIP()); @@ -1396,7 +1341,7 @@ COOP_PINVOKE_HELPER(uint64_t, RhCurrentOSThreadId, ()) } // Standard calling convention variant and actual implementation for RhpReversePInvokeAttachOrTrapThread -EXTERN_C NOINLINE void FASTCALL RhpReversePInvokeAttachOrTrapThread2(ReversePInvokeFrame * pFrame) +EXTERN_C NOINLINE void FASTCALL RhpReversePInvokeAttachOrTrapThread2(ReversePInvokeFrame* pFrame) { ASSERT(pFrame->m_savedThread == ThreadStore::RawGetCurrentThread()); pFrame->m_savedThread->ReversePInvokeAttachOrTrapThread(pFrame); diff --git a/src/coreclr/nativeaot/Runtime/thread.h b/src/coreclr/nativeaot/Runtime/thread.h index 95cc8e5521c99..a09ddd0ae02c0 100644 --- a/src/coreclr/nativeaot/Runtime/thread.h +++ b/src/coreclr/nativeaot/Runtime/thread.h @@ -70,21 +70,14 @@ struct ThreadBuffer { uint8_t m_rgbAllocContextBuffer[SIZEOF_ALLOC_CONTEXT]; uint32_t volatile m_ThreadStateFlags; // see Thread::ThreadStateFlags enum -#if DACCESS_COMPILE - volatile PInvokeTransitionFrame* m_pTransitionFrame; -#else - PInvokeTransitionFrame* m_pTransitionFrame; -#endif PInvokeTransitionFrame* m_pDeferredTransitionFrame; // see Thread::EnablePreemptiveMode PInvokeTransitionFrame* m_pCachedTransitionFrame; PTR_Thread m_pNext; // used by ThreadStore's SList HANDLE m_hPalThread; // WARNING: this may legitimately be INVALID_HANDLE_VALUE void ** m_ppvHijackedReturnAddressLocation; void * m_pvHijackedReturnAddress; -#ifdef HOST_64BIT - uintptr_t m_uHijackedReturnValueFlags; // used on ARM64 and UNIX only; however, ARM64 and AMD64 share field offsets -#endif // HOST_64BIT + uintptr_t m_uHijackedReturnValueFlags; PTR_ExInfo m_pExInfoStackHead; Object* m_threadAbortException; // ThreadAbortException instance -set only during thread abort PTR_PTR_VOID m_pThreadLocalModuleStatics; @@ -142,10 +135,18 @@ class Thread : private ThreadBuffer void ClearState(ThreadStateFlags flags); bool IsStateSet(ThreadStateFlags flags); + static void HijackCallback(NATIVE_CONTEXT* pThreadContext, void* pThreadToHijack); - void HijackReturnAddress(PAL_LIMITED_CONTEXT* pSuspendCtx, void * pvHijackTargets[]); - void HijackReturnAddress(NATIVE_CONTEXT* pSuspendCtx, void* pvHijackTargets[]); - void HijackReturnAddressWorker(StackFrameIterator* frameIterator, void* pvHijackTargets[]); + + // + // Hijack funcs are not called, they are "returned to". And when done, they return to the actual caller. + // Thus they cannot have any parameters or return anything. + // + typedef void HijackFunc(); + + void HijackReturnAddress(PAL_LIMITED_CONTEXT* pSuspendCtx, HijackFunc* pfnHijackFunction); + void HijackReturnAddress(NATIVE_CONTEXT* pSuspendCtx, HijackFunc* pfnHijackFunction); + void HijackReturnAddressWorker(StackFrameIterator* frameIterator, HijackFunc* pfnHijackFunction); bool InlineSuspend(NATIVE_CONTEXT* interruptedContext); #ifdef FEATURE_SUSPEND_REDIRECTION diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc index 260a2ca533dc4..1aaf7c53ff615 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc @@ -320,7 +320,7 @@ C_FUNC(\Name): DEFAULT_FRAME_SAVE_FLAGS = PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_RSP .macro PUSH_COOP_PINVOKE_FRAME trashReg - push_nonvol_reg rbp // push RBP frame // TODO: do we need this? not on windows. + push_nonvol_reg rbp // push RBP frame mov rbp, rsp lea \trashReg, [rsp + 0x10] push_register \trashReg // save caller's RSP