From dd9382ee9a64c35d83d8318753eda8fb12979f41 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Mon, 18 Jul 2022 14:57:00 -0700 Subject: [PATCH 01/17] remove RhpReversePInvokeAttachOrTrapThread (dead code) --- .../nativeaot/Runtime/amd64/AsmMacros.inc | 1 - .../nativeaot/Runtime/amd64/PInvoke.asm | 49 ----------------- src/coreclr/nativeaot/Runtime/arm/AsmMacros.h | 1 - src/coreclr/nativeaot/Runtime/arm/PInvoke.asm | 27 ---------- .../nativeaot/Runtime/arm64/AsmMacros.h | 1 - src/coreclr/nativeaot/Runtime/arm64/PInvoke.S | 52 ------------------ .../nativeaot/Runtime/arm64/PInvoke.asm | 53 ------------------- .../nativeaot/Runtime/i386/AsmMacros.inc | 2 - src/coreclr/nativeaot/Runtime/thread.cpp | 8 +-- 9 files changed, 1 insertion(+), 193 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc b/src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc index cb1f9830eb89a..d44537b2456c1 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc +++ b/src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc @@ -396,7 +396,6 @@ EXTERN RhpGcAlloc : PROC EXTERN RhpValidateExInfoPop : PROC EXTERN RhDebugBreak : PROC EXTERN RhpWaitForGC2 : PROC -EXTERN RhpReversePInvokeAttachOrTrapThread2 : PROC EXTERN RhExceptionHandling_FailedAllocation : PROC EXTERN RhThrowHwEx : PROC EXTERN RhThrowEx : PROC diff --git a/src/coreclr/nativeaot/Runtime/amd64/PInvoke.asm b/src/coreclr/nativeaot/Runtime/amd64/PInvoke.asm index ffa74efc257d3..10e51b74d85d0 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/PInvoke.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/PInvoke.asm @@ -74,55 +74,6 @@ Done: NESTED_END RhpWaitForGC, _TEXT -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; -;; RhpReversePInvokeAttachOrTrapThread -;; -;; -;; INCOMING: RAX -- address of reverse pinvoke frame -;; -;; PRESERVES: RCX, RDX, R8, R9 -- need to preserve these because the caller assumes they aren't trashed -;; -;; TRASHES: RAX, R10, R11 -;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -NESTED_ENTRY RhpReversePInvokeAttachOrTrapThread, _TEXT - alloc_stack 88h ; alloc scratch area and frame - - ; save the integer arg regs - save_reg_postrsp rcx, (20h + 0*8) - save_reg_postrsp rdx, (20h + 1*8) - save_reg_postrsp r8, (20h + 2*8) - save_reg_postrsp r9, (20h + 3*8) - - ; save the FP arg regs - save_xmm128_postrsp xmm0, (20h + 4*8 + 0*10h) - save_xmm128_postrsp xmm1, (20h + 4*8 + 1*10h) - save_xmm128_postrsp xmm2, (20h + 4*8 + 2*10h) - save_xmm128_postrsp xmm3, (20h + 4*8 + 3*10h) - - END_PROLOGUE - - mov rcx, rax ; rcx <- reverse pinvoke frame - call RhpReversePInvokeAttachOrTrapThread2 - - movdqa xmm0, [rsp + (20h + 4*8 + 0*10h)] - movdqa xmm1, [rsp + (20h + 4*8 + 1*10h)] - movdqa xmm2, [rsp + (20h + 4*8 + 2*10h)] - movdqa xmm3, [rsp + (20h + 4*8 + 3*10h)] - - mov rcx, [rsp + (20h + 0*8)] - mov rdx, [rsp + (20h + 1*8)] - mov r8, [rsp + (20h + 2*8)] - mov r9, [rsp + (20h + 3*8)] - - ;; epilog - add rsp, 88h - ret - -NESTED_END RhpReversePInvokeAttachOrTrapThread, _TEXT - - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; RhpPInvoke diff --git a/src/coreclr/nativeaot/Runtime/arm/AsmMacros.h b/src/coreclr/nativeaot/Runtime/arm/AsmMacros.h index 64f3a530acfb4..96e1be63315f7 100644 --- a/src/coreclr/nativeaot/Runtime/arm/AsmMacros.h +++ b/src/coreclr/nativeaot/Runtime/arm/AsmMacros.h @@ -263,7 +263,6 @@ Name SETS "|$FuncName|" EXTERN RhpGcAlloc EXTERN RhDebugBreak EXTERN RhpWaitForGC2 - EXTERN RhpReversePInvokeAttachOrTrapThread2 EXTERN RhExceptionHandling_FailedAllocation diff --git a/src/coreclr/nativeaot/Runtime/arm/PInvoke.asm b/src/coreclr/nativeaot/Runtime/arm/PInvoke.asm index e1cbe46c77813..2d63932c75fda 100644 --- a/src/coreclr/nativeaot/Runtime/arm/PInvoke.asm +++ b/src/coreclr/nativeaot/Runtime/arm/PInvoke.asm @@ -69,30 +69,3 @@ NoAbort NESTED_END RhpWaitForGC INLINE_GETTHREAD_CONSTANT_POOL - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; -;; RhpReversePInvokeAttachOrTrapThread -- rare path for RhpPInvoke -;; -;; -;; INPUT: r4: address of reverse pinvoke frame -;; -;; TRASHES: none -;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - NESTED_ENTRY RhpReversePInvokeAttachOrTrapThread - - PROLOG_PUSH {r0-r4,lr} ; Need to save argument registers r0-r3 and lr, r4 is just for alignment - PROLOG_VPUSH {d0-d7} ; Save float argument registers as well since they're volatile - - mov r0, r4 ; passing reverse pinvoke frame pointer in r0 - bl RhpReversePInvokeAttachOrTrapThread2 - - EPILOG_VPOP {d0-d7} - EPILOG_POP {r0-r4,pc} - - NESTED_END RhpReversePInvokeTrapThread - - - end diff --git a/src/coreclr/nativeaot/Runtime/arm64/AsmMacros.h b/src/coreclr/nativeaot/Runtime/arm64/AsmMacros.h index 0cea6597d1047..d092f638be292 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/AsmMacros.h +++ b/src/coreclr/nativeaot/Runtime/arm64/AsmMacros.h @@ -97,7 +97,6 @@ OFFSETOF__Thread__m_alloc_context__alloc_limit equ OFFSETOF__Thread__m_rgbA EXTERN RhExceptionHandling_FailedAllocation EXTERN RhDebugBreak EXTERN RhpWaitForGC2 - EXTERN RhpReversePInvokeAttachOrTrapThread2 EXTERN RhThrowHwEx EXTERN RhThrowEx EXTERN RhRethrow diff --git a/src/coreclr/nativeaot/Runtime/arm64/PInvoke.S b/src/coreclr/nativeaot/Runtime/arm64/PInvoke.S index 3a24634ff0383..f4becffaed691 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/PInvoke.S +++ b/src/coreclr/nativeaot/Runtime/arm64/PInvoke.S @@ -88,58 +88,6 @@ NoAbort: NESTED_END RhpWaitForGC, _TEXT -////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// -// RhpReversePInvokeAttachOrTrapThread -- rare path for RhpPInvoke -// -// -// INPUT: x9: address of reverse pinvoke frame -// -// PRESERVES: x0-x8 -- need to preserve these because the caller assumes they are not trashed -// -// TRASHES: none -// -////////////////////////////////////////////////////////////////////////////////////////////////////////////// - NESTED_ENTRY RhpReversePInvokeAttachOrTrapThread, _TEXT, NoHandler - - // FP and LR registers - PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -0xA0 // Push down stack pointer and store FP and LR - - // Need to save argument registers x0-x7 and the return buffer register x8 (twice for 16B alignment) - stp x0, x1, [sp, #0x10] - stp x2, x3, [sp, #0x20] - stp x4, x5, [sp, #0x30] - stp x6, x7, [sp, #0x40] - stp x8, x8, [sp, #0x50] - - // Save float argument registers as well since they are volatile - stp d0, d1, [sp, #0x60] - stp d2, d3, [sp, #0x70] - stp d4, d5, [sp, #0x80] - stp d6, d7, [sp, #0x90] - - mov x0, x9 // passing reverse pinvoke frame pointer in x0 - bl RhpReversePInvokeAttachOrTrapThread2 - - // Restore floating point registers - ldp d0, d1, [sp, #0x60] - ldp d2, d3, [sp, #0x70] - ldp d4, d5, [sp, #0x80] - ldp d6, d7, [sp, #0x90] - - // Restore the argument registers - ldp x0, x1, [sp, #0x10] - ldp x2, x3, [sp, #0x20] - ldp x4, x5, [sp, #0x30] - ldp x6, x7, [sp, #0x40] - ldr x8, [sp, #0x50] - - // Restore FP and LR registers, and free the allocated stack block - EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 0xA0 - EPILOG_RETURN - - NESTED_END RhpReversePInvokeAttachOrTrapThread, _TEXT - // // RhpPInvoke // diff --git a/src/coreclr/nativeaot/Runtime/arm64/PInvoke.asm b/src/coreclr/nativeaot/Runtime/arm64/PInvoke.asm index 540df990c853c..567ce2dd8f935 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/PInvoke.asm +++ b/src/coreclr/nativeaot/Runtime/arm64/PInvoke.asm @@ -79,59 +79,6 @@ NoAbort INLINE_GETTHREAD_CONSTANT_POOL -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; -;; RhpReversePInvokeAttachOrTrapThread -- rare path for RhpPInvoke -;; -;; -;; INPUT: x9: address of reverse pinvoke frame -;; -;; PRESERVES: x0-x8 -- need to preserve these because the caller assumes they aren't trashed -;; -;; TRASHES: none -;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - NESTED_ENTRY RhpReversePInvokeAttachOrTrapThread - - ;; FP and LR registers - PROLOG_SAVE_REG_PAIR fp, lr, #-0xA0! ;; Push down stack pointer and store FP and LR - - ;; Need to save argument registers x0-x7 and the return buffer register x8 (twice for 16B alignment) - stp x0, x1, [sp, #0x10] - stp x2, x3, [sp, #0x20] - stp x4, x5, [sp, #0x30] - stp x6, x7, [sp, #0x40] - stp x8, x8, [sp, #0x50] - - ;; Save float argument registers as well since they're volatile - stp d0, d1, [sp, #0x60] - stp d2, d3, [sp, #0x70] - stp d4, d5, [sp, #0x80] - stp d6, d7, [sp, #0x90] - - mov x0, x9 ; passing reverse pinvoke frame pointer in x0 - bl RhpReversePInvokeAttachOrTrapThread2 - - ;; Restore floating point registers - ldp d0, d1, [sp, #0x60] - ldp d2, d3, [sp, #0x70] - ldp d4, d5, [sp, #0x80] - ldp d6, d7, [sp, #0x90] - - ;; Restore the argument registers - ldp x0, x1, [sp, #0x10] - ldp x2, x3, [sp, #0x20] - ldp x4, x5, [sp, #0x30] - ldp x6, x7, [sp, #0x40] - ldr x8, [sp, #0x50] - - ;; Restore FP and LR registers, and free the allocated stack block - EPILOG_RESTORE_REG_PAIR fp, lr, #0xA0! - EPILOG_RETURN - - NESTED_END RhpReversePInvokeAttachOrTrapThread - - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; RhpPInvoke diff --git a/src/coreclr/nativeaot/Runtime/i386/AsmMacros.inc b/src/coreclr/nativeaot/Runtime/i386/AsmMacros.inc index 1bb6d2d34bba9..495a318c7bf92 100644 --- a/src/coreclr/nativeaot/Runtime/i386/AsmMacros.inc +++ b/src/coreclr/nativeaot/Runtime/i386/AsmMacros.inc @@ -180,7 +180,6 @@ G_EPHEMERAL_LOW equ _g_ephemeral_low G_EPHEMERAL_HIGH equ _g_ephemeral_high G_CARD_TABLE equ _g_card_table RhpWaitForGC2 equ @RhpWaitForGC2@4 -RhpReversePInvokeAttachOrTrapThread2 equ @RhpReversePInvokeAttachOrTrapThread2@4 RhpTrapThreads equ _RhpTrapThreads ifdef FEATURE_GC_STRESS @@ -194,7 +193,6 @@ endif ;; FEATURE_GC_STRESS EXTERN RhpGcAlloc : PROC EXTERN RhDebugBreak : PROC EXTERN RhpWaitForGC2 : PROC -EXTERN RhpReversePInvokeAttachOrTrapThread2 : PROC EXTERN RhExceptionHandling_FailedAllocation : PROC EXTERN RhThrowHwEx : PROC EXTERN RhThrowEx : PROC diff --git a/src/coreclr/nativeaot/Runtime/thread.cpp b/src/coreclr/nativeaot/Runtime/thread.cpp index cecee8f9e6a82..d3bc792639c30 100644 --- a/src/coreclr/nativeaot/Runtime/thread.cpp +++ b/src/coreclr/nativeaot/Runtime/thread.cpp @@ -1395,12 +1395,6 @@ COOP_PINVOKE_HELPER(uint64_t, RhCurrentOSThreadId, ()) return PalGetCurrentThreadIdForLogging(); } -// Standard calling convention variant and actual implementation for RhpReversePInvokeAttachOrTrapThread -EXTERN_C NOINLINE void FASTCALL RhpReversePInvokeAttachOrTrapThread2(ReversePInvokeFrame * pFrame) -{ - ASSERT(pFrame->m_savedThread == ThreadStore::RawGetCurrentThread()); - pFrame->m_savedThread->ReversePInvokeAttachOrTrapThread(pFrame); -} // // PInvoke @@ -1413,7 +1407,7 @@ COOP_PINVOKE_HELPER(void, RhpReversePInvoke, (ReversePInvokeFrame * pFrame)) if (pCurThread->InlineTryFastReversePInvoke(pFrame)) return; - RhpReversePInvokeAttachOrTrapThread2(pFrame); + pCurThread->ReversePInvokeAttachOrTrapThread(pFrame); } COOP_PINVOKE_HELPER(void, RhpReversePInvokeReturn, (ReversePInvokeFrame * pFrame)) From c60f01f894de199f6d3bdb347ad9fe2e5254cda4 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Mon, 18 Jul 2022 15:21:22 -0700 Subject: [PATCH 02/17] remove RhpWaitForGC --- .../nativeaot/Runtime/amd64/PInvoke.asm | 41 +------------------ src/coreclr/nativeaot/Runtime/arm/PInvoke.asm | 33 --------------- src/coreclr/nativeaot/Runtime/arm64/PInvoke.S | 34 --------------- .../nativeaot/Runtime/arm64/PInvoke.asm | 36 ---------------- .../nativeaot/Runtime/i386/PInvoke.asm | 41 ------------------- 5 files changed, 1 insertion(+), 184 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/amd64/PInvoke.asm b/src/coreclr/nativeaot/Runtime/amd64/PInvoke.asm index 10e51b74d85d0..95a578ebfe25d 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/PInvoke.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/PInvoke.asm @@ -35,45 +35,6 @@ Done: NESTED_END RhpWaitForGCNoAbort, _TEXT -EXTERN RhpThrowHwEx : PROC - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; -;; RhpWaitForGC -- rare path for RhpPInvokeReturn -;; -;; -;; INPUT: RCX: transition frame -;; -;; TRASHES: RCX, RDX, R8, R9, R10, R11 -;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -NESTED_ENTRY RhpWaitForGC, _TEXT - push_nonvol_reg rbx - END_PROLOGUE - - mov rbx, rcx - - test [RhpTrapThreads], TrapThreadsFlags_TrapThreads - jz NoWait - - call RhpWaitForGCNoAbort -NoWait: - test [RhpTrapThreads], TrapThreadsFlags_AbortInProgress - jz Done - test dword ptr [rbx + OFFSETOF__PInvokeTransitionFrame__m_Flags], PTFF_THREAD_ABORT - jz Done - - mov rcx, STATUS_REDHAWK_THREAD_ABORT - pop rbx - pop rdx ; return address as exception RIP - jmp RhpThrowHwEx ; Throw the ThreadAbortException as a special kind of hardware exception - -Done: - pop rbx - ret - -NESTED_END RhpWaitForGC, _TEXT - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; RhpPInvoke @@ -122,7 +83,7 @@ LEAF_ENTRY RhpPInvokeReturn, _TEXT ret @@: ; passing transition frame pointer in rcx - jmp RhpWaitForGC + jmp RhpWaitForGC2 LEAF_END RhpPInvokeReturn, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/arm/PInvoke.asm b/src/coreclr/nativeaot/Runtime/arm/PInvoke.asm index 2d63932c75fda..26c2055fd79c2 100644 --- a/src/coreclr/nativeaot/Runtime/arm/PInvoke.asm +++ b/src/coreclr/nativeaot/Runtime/arm/PInvoke.asm @@ -35,37 +35,4 @@ Done NESTED_END RhpWaitForGCNoAbort - EXTERN RhpThrowHwEx - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; -;; RhpWaitForGC -;; -;; -;; INPUT: r2: transition frame -;; -;; OUTPUT: -;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - NESTED_ENTRY RhpWaitForGC - PROLOG_PUSH {r0,lr} - - ldr r0, =RhpTrapThreads - ldr r0, [r0] - tst r0, #TrapThreadsFlags_TrapThreads - beq NoWait - bl RhpWaitForGCNoAbort -NoWait - tst r0, #TrapThreadsFlags_AbortInProgress - beq NoAbort - ldr r0, [r2, #OFFSETOF__PInvokeTransitionFrame__m_Flags] - tst r0, #PTFF_THREAD_ABORT - beq NoAbort - EPILOG_POP {r0,r1} ; hijack target address as exception PC - EPILOG_NOP mov r0, #STATUS_REDHAWK_THREAD_ABORT - EPILOG_BRANCH RhpThrowHwEx -NoAbort - EPILOG_POP {r0,pc} - NESTED_END RhpWaitForGC - INLINE_GETTHREAD_CONSTANT_POOL diff --git a/src/coreclr/nativeaot/Runtime/arm64/PInvoke.S b/src/coreclr/nativeaot/Runtime/arm64/PInvoke.S index f4becffaed691..3c8ce2eafedea 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/PInvoke.S +++ b/src/coreclr/nativeaot/Runtime/arm64/PInvoke.S @@ -54,40 +54,6 @@ Done: NESTED_END RhpWaitForGCNoAbort -////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// -// RhpWaitForGC -// -// -// INPUT: x9: transition frame -// -// TRASHES: x0, x1, x10 -// -////////////////////////////////////////////////////////////////////////////////////////////////////////////// - NESTED_ENTRY RhpWaitForGC, _TEXT, NoHandler - - PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -0x10 - - PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, 10 - - tbz x10, #TrapThreadsFlags_TrapThreads_Bit, NoWait - bl RhpWaitForGCNoAbort -NoWait: - tbz x10, #TrapThreadsFlags_AbortInProgress_Bit, NoAbort - ldr x10, [x9, #OFFSETOF__PInvokeTransitionFrame__m_Flags] - tbz x10, #PTFF_THREAD_ABORT_BIT, NoAbort - - EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 0x10 - mov w0, #STATUS_REDHAWK_THREAD_ABORT - mov x1, lr // hijack target address as exception PC - b RhpThrowHwEx - -NoAbort: - EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 0x10 - EPILOG_RETURN - - NESTED_END RhpWaitForGC, _TEXT - // // RhpPInvoke // diff --git a/src/coreclr/nativeaot/Runtime/arm64/PInvoke.asm b/src/coreclr/nativeaot/Runtime/arm64/PInvoke.asm index 567ce2dd8f935..27d40b911c50f 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/PInvoke.asm +++ b/src/coreclr/nativeaot/Runtime/arm64/PInvoke.asm @@ -41,42 +41,6 @@ Done NESTED_END RhpWaitForGCNoAbort - EXTERN RhpThrowHwEx - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; -;; RhpWaitForGC -;; -;; -;; INPUT: x9: transition frame -;; -;; TRASHES: x0, x1, x10 -;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - NESTED_ENTRY RhpWaitForGC - - PROLOG_SAVE_REG_PAIR fp, lr, #-0x10! - - ldr x10, =RhpTrapThreads - ldr w10, [x10] - tbz x10, #TrapThreadsFlags_TrapThreads_Bit, NoWait - bl RhpWaitForGCNoAbort -NoWait - tbz x10, #TrapThreadsFlags_AbortInProgress_Bit, NoAbort - ldr x10, [x9, #OFFSETOF__PInvokeTransitionFrame__m_Flags] - tbz x10, #PTFF_THREAD_ABORT_BIT, NoAbort - - EPILOG_RESTORE_REG_PAIR fp, lr, #0x10! - EPILOG_NOP mov w0, #STATUS_REDHAWK_THREAD_ABORT - EPILOG_NOP mov x1, lr ; hijack target address as exception PC - EPILOG_NOP b RhpThrowHwEx - -NoAbort - EPILOG_RESTORE_REG_PAIR fp, lr, #0x10! - EPILOG_RETURN - - NESTED_END RhpWaitForGC - INLINE_GETTHREAD_CONSTANT_POOL ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/src/coreclr/nativeaot/Runtime/i386/PInvoke.asm b/src/coreclr/nativeaot/Runtime/i386/PInvoke.asm index 7bd31cf93f246..cf476de49309c 100644 --- a/src/coreclr/nativeaot/Runtime/i386/PInvoke.asm +++ b/src/coreclr/nativeaot/Runtime/i386/PInvoke.asm @@ -44,45 +44,4 @@ Done: ret _RhpWaitForGCNoAbort endp -RhpThrowHwEx equ @RhpThrowHwEx@0 -EXTERN RhpThrowHwEx : PROC - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; -;; RhpWaitForGC -;; -;; -;; INPUT: ECX: transition frame -;; -;; OUTPUT: -;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -_RhpWaitForGC proc public - push ebp - mov ebp, esp - push ebx - - mov ebx, ecx - test [RhpTrapThreads], TrapThreadsFlags_TrapThreads - jz NoWait - - call _RhpWaitForGCNoAbort -NoWait: - test [RhpTrapThreads], TrapThreadsFlags_AbortInProgress - jz Done - test dword ptr [ebx + OFFSETOF__PInvokeTransitionFrame__m_Flags], PTFF_THREAD_ABORT - jz Done - - mov ecx, STATUS_REDHAWK_THREAD_ABORT - pop ebx - pop ebp - pop edx ; return address as exception RIP - jmp RhpThrowHwEx ; Throw the ThreadAbortException as a special kind of hardware exception -Done: - pop ebx - pop ebp - ret -_RhpWaitForGC endp - - end From 45dfbfcf2edc18995d8bdb18a364ff5d14fec16d Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Tue, 19 Jul 2022 00:33:44 -0700 Subject: [PATCH 03/17] removing some dead code --- src/coreclr/nativeaot/Runtime/amd64/GcProbe.S | 75 +----- .../nativeaot/Runtime/amd64/GcProbe.asm | 225 +--------------- .../nativeaot/Runtime/amd64/PInvoke.asm | 32 --- src/coreclr/nativeaot/Runtime/arm/GcProbe.asm | 230 +--------------- src/coreclr/nativeaot/Runtime/arm/PInvoke.asm | 30 --- .../nativeaot/Runtime/arm64/GcProbe.asm | 56 +--- src/coreclr/nativeaot/Runtime/arm64/PInvoke.S | 36 --- .../nativeaot/Runtime/arm64/PInvoke.asm | 38 --- .../nativeaot/Runtime/i386/GcProbe.asm | 246 +----------------- .../nativeaot/Runtime/i386/PInvoke.asm | 35 --- 10 files changed, 43 insertions(+), 960 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.S b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.S index 810c7e35b90cd..ee1dab36391d2 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.S +++ b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.S @@ -67,24 +67,6 @@ pop rdx .endm -// -// Macro to clear the hijack state. This is safe to do because the suspension code will not Unhijack this -// thread if it finds it at an IP that isn`t managed code. -// -// Register state on entry: -// R11: thread pointer -// -// Register state on exit: -// R9: trashed -// -.macro ClearHijackState - xor r9, r9 - mov [r11 + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], r9 - mov [r11 + OFFSETOF__Thread__m_pvHijackedReturnAddress], r9 - mov [r11 + OFFSETOF__Thread__m_uHijackedReturnValueFlags], r9 -.endm - - // // The prolog for all GC suspension hijacks (normal and stress). Fixes up the hijacked return address, and // clears the hijack state. @@ -117,53 +99,13 @@ mov rcx, [r11 + OFFSETOF__Thread__m_uHijackedReturnValueFlags] - ClearHijackState -.endm - -////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// -// RhpWaitForGCNoAbort -- rare path for WaitForGCCompletion -// -// -// INPUT: RDI: transition frame -// -// TRASHES: RCX, RDI, R8, R9, R10, R11 -// -////////////////////////////////////////////////////////////////////////////////////////////////////////////// -NESTED_ENTRY RhpWaitForGCNoAbort, _TEXT, NoHandler - END_PROLOGUE - - mov rdx, [rdi + OFFSETOF__PInvokeTransitionFrame__m_pThread] - - test dword ptr [rdx + OFFSETOF__Thread__m_ThreadStateFlags], TSF_DoNotTriggerGc - jnz Done - - // passing transition frame pointer in rdi - call C_FUNC(RhpWaitForGC2) - -Done: - ret - -NESTED_END RhpWaitForGCNoAbort, _TEXT - -// -// Set the Thread state and wait for a GC to complete. -// -// Register state on entry: -// RBX: thread pointer -// -// Register state on exit: -// RBX: thread pointer -// All other registers trashed -// - -.macro WaitForGCCompletion - test dword ptr [rbx + OFFSETOF__Thread__m_ThreadStateFlags], TSF_SuppressGcStress + TSF_DoNotTriggerGc - jnz LOCAL_LABEL(NoWait) - - mov rdi, [rbx + OFFSETOF__Thread__m_pDeferredTransitionFrame] - call C_FUNC(RhpWaitForGCNoAbort) -LOCAL_LABEL(NoWait): + // + // Clear hijack state + // + xor r9, r9 + mov [r11 + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], r9 + mov [r11 + OFFSETOF__Thread__m_pvHijackedReturnAddress], r9 + mov [r11 + OFFSETOF__Thread__m_uHijackedReturnValueFlags], r9 .endm @@ -190,7 +132,8 @@ LOCAL_LABEL(RhpGcProbe_Trap): END_PROLOGUE mov rbx, r11 - WaitForGCCompletion + mov rdi, [rbx + OFFSETOF__Thread__m_pDeferredTransitionFrame] + call C_FUNC(RhpWaitForGC2) mov rax, [rbx + OFFSETOF__Thread__m_pDeferredTransitionFrame] test dword ptr [rax + OFFSETOF__PInvokeTransitionFrame__m_Flags], PTFF_THREAD_ABORT diff --git a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm index 2a83258629bd5..f114c17655326 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm @@ -73,23 +73,6 @@ POP_PROBE_FRAME macro extraStack pop rax endm -;; -;; Macro to clear the hijack state. This is safe to do because the suspension code will not Unhijack this -;; thread if it finds it at an IP that isn't managed code. -;; -;; Register state on entry: -;; RDX: thread pointer -;; -;; Register state on exit: -;; RCX: trashed -;; -ClearHijackState macro - xor ecx, ecx - mov [rdx + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], rcx - mov [rdx + OFFSETOF__Thread__m_pvHijackedReturnAddress], rcx -endm - - ;; ;; The prolog for all GC suspension hijacks (normal and stress). Fixes up the hijacked return address, and ;; clears the hijack state. @@ -112,33 +95,15 @@ FixupHijackedCallstack macro mov rcx, [rdx + OFFSETOF__Thread__m_pvHijackedReturnAddress] push rcx - ClearHijackState -endm - -;; -;; Set the Thread state and wait for a GC to complete. -;; -;; Register state on entry: -;; RBX: thread pointer -;; -;; Register state on exit: -;; RBX: thread pointer -;; All other registers trashed -;; - -EXTERN RhpWaitForGCNoAbort : PROC - -WaitForGCCompletion macro - test dword ptr [rbx + OFFSETOF__Thread__m_ThreadStateFlags], TSF_SuppressGcStress + TSF_DoNotTriggerGc - jnz @F - - mov rcx, [rbx + OFFSETOF__Thread__m_pDeferredTransitionFrame] - call RhpWaitForGCNoAbort -@@: + ;; + ;; Clear hijack state + ;; + xor ecx, ecx + mov [rdx + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], rcx + mov [rdx + OFFSETOF__Thread__m_pvHijackedReturnAddress], rcx endm - EXTERN RhpPInvokeExceptionGuard : PROC ;; @@ -228,7 +193,8 @@ NESTED_ENTRY RhpGcProbe, _TEXT END_PROLOGUE mov rbx, rdx - WaitForGCCompletion + mov rcx, [rbx + OFFSETOF__Thread__m_pDeferredTransitionFrame] + call RhpWaitForGC2 mov rax, [rbx + OFFSETOF__Thread__m_pDeferredTransitionFrame] test dword ptr [rax + OFFSETOF__PInvokeTransitionFrame__m_Flags], PTFF_THREAD_ABORT @@ -342,182 +308,7 @@ endif ;; FEATURE_GC_STRESS ;; -;; The following functions are _jumped_ to when we need to transfer control from one method to another for EH -;; dispatch. These are needed to properly coordinate with the GC hijacking logic. We are essentially replacing -;; the return from the throwing method with a jump to the handler in the caller, but we need to be aware of -;; any return address hijack that may be in place for GC suspension. These routines use a quick test of the -;; return address against a specific GC hijack routine, and then fixup the stack pointer to what it would be -;; after a real return from the throwing method. Then, if we are not hijacked we can simply jump to the -;; handler in the caller. -;; -;; If we are hijacked, then we jump to a routine that will unhijack appropriatley and wait for the GC to -;; complete. There are also variants for GC stress. -;; -;; Note that at this point we are eiher hijacked or we are not, and this will not change until we return to -;; managed code. It is an invariant of the system that a thread will only attempt to hijack or unhijack -;; another thread while the target thread is suspended in managed code, and this is _not_ managed code. -;; -;; Register state on entry: -;; RAX: pointer to this function (i.e., trash) -;; RCX: reference to the exception object. -;; RDX: handler address we want to jump to. -;; RBX, RSI, RDI, RBP, and R12-R15 are all already correct for return to the caller. -;; The stack still contains the return address. -;; -;; Register state on exit: -;; RSP: what it would be after a complete return to the caler. -;; RDX: TRASHED -;; -RTU_EH_JUMP_HELPER macro funcName, hijackFuncName, isStress, stressFuncName -LEAF_ENTRY funcName, _TEXT - lea rax, [hijackFuncName] - cmp [rsp], rax - je RhpGCProbeForEHJump - -IF isStress EQ 1 - lea rax, [stressFuncName] - cmp [rsp], rax - je RhpGCStressProbeForEHJump -ENDIF - - ;; We are not hijacked, so we can return to the handler. - ;; We return to keep the call/return prediction balanced. - mov [rsp], rdx ; Update the return address - ret - -LEAF_END funcName, _TEXT -endm - -;; We need an instance of the helper for each possible hijack function. The binder has enough -;; information to determine which one we need to use for any function. -RTU_EH_JUMP_HELPER RhpEHJumpScalar, RhpGcProbeHijackScalar, 0, 0 -RTU_EH_JUMP_HELPER RhpEHJumpObject, RhpGcProbeHijackObject, 0, 0 -RTU_EH_JUMP_HELPER RhpEHJumpByref, RhpGcProbeHijackByref, 0, 0 -ifdef FEATURE_GC_STRESS -RTU_EH_JUMP_HELPER RhpEHJumpScalarGCStress, RhpGcProbeHijackScalar, 1, RhpGcStressHijackScalar -RTU_EH_JUMP_HELPER RhpEHJumpObjectGCStress, RhpGcProbeHijackObject, 1, RhpGcStressHijackObject -RTU_EH_JUMP_HELPER RhpEHJumpByrefGCStress, RhpGcProbeHijackByref, 1, RhpGcStressHijackByref -endif - -;; -;; Macro to setup our frame and adjust the location of the EH object reference for EH jump probe funcs. -;; -;; Register state on entry: -;; RAX: scratch -;; RCX: reference to the exception object. -;; RDX: handler address we want to jump to. -;; RBX, RSI, RDI, RBP, and R12-R15 are all already correct for return to the caller. -;; The stack is as if we are just about to returned from the call -;; -;; Register state on exit: -;; RAX: reference to the exception object -;; RCX: scratch -;; RDX: thread pointer -;; -EHJumpProbeProlog_extraStack = 1*8 -EHJumpProbeProlog macro - push_nonvol_reg rdx ; save the handler address so we can jump to it later - mov rax, rcx ; move the ex object reference into rax so we can report it - - ;; rdx <- GetThread(), TRASHES rcx - INLINE_GETTHREAD rdx, rcx - - ;; Fix the stack by patching the original return address - mov rcx, [rdx + OFFSETOF__Thread__m_pvHijackedReturnAddress] - mov [rsp + EHJumpProbeProlog_extraStack], rcx - - ClearHijackState - - ; TRASHES r10 - PUSH_PROBE_FRAME rdx, r10, EHJumpProbeProlog_extraStack, PROBE_SAVE_FLAGS_RAX_IS_GCREF - - END_PROLOGUE -endm - -;; -;; Macro to re-adjust the location of the EH object reference, cleanup the frame, and make the -;; final jump to the handler for EH jump probe funcs. -;; -;; Register state on entry: -;; RAX: reference to the exception object -;; RCX: scratch -;; RDX: scratch -;; -;; Register state on exit: -;; RSP: correct for return to the caller -;; RCX: reference to the exception object -;; RDX: trashed -;; -EHJumpProbeEpilog macro - POP_PROBE_FRAME EHJumpProbeProlog_extraStack - mov rcx, rax ; Put the EX obj ref back into rcx for the handler. - - pop rax ; Recover the handler address. - mov [rsp], rax ; Update the return address - ret -endm - -;; -;; We are hijacked for a normal GC (not GC stress), so we need to unhijcak and wait for the GC to complete. -;; -;; Register state on entry: -;; RAX: scratch -;; RCX: reference to the exception object. -;; RDX: handler address we want to jump to. -;; RBX, RSI, RDI, RBP, and R12-R15 are all already correct for return to the caller. -;; The stack is as if we have tail called to this function (rsp points to return address). -;; -;; Register state on exit: -;; RSP: correct for return to the caller -;; RBP: previous ebp frame -;; RCX: reference to the exception object -;; -NESTED_ENTRY RhpGCProbeForEHJump, _TEXT - EHJumpProbeProlog - -ifdef _DEBUG - ;; - ;; If we get here, then we have been hijacked for a real GC, and our SyncState must - ;; reflect that we've been requested to synchronize. - - test [RhpTrapThreads], TrapThreadsFlags_TrapThreads - jnz @F - - call RhDebugBreak -@@: -endif ;; _DEBUG - - mov rbx, rdx - WaitForGCCompletion - - EHJumpProbeEpilog - -NESTED_END RhpGCProbeForEHJump, _TEXT - ifdef FEATURE_GC_STRESS -;; -;; We are hijacked for GC Stress (not a normal GC) so we need to invoke the GC stress helper. -;; -;; Register state on entry: -;; RAX: scratch -;; RCX: reference to the exception object. -;; RDX: handler address we want to jump to. -;; RBX, RSI, RDI, RBP, and R12-R15 are all already correct for return to the caller. -;; The stack is as if we have tail called to this function (rsp points to return address). -;; -;; Register state on exit: -;; RSP: correct for return to the caller -;; RBP: previous ebp frame -;; RCX: reference to the exception object -;; -NESTED_ENTRY RhpGCStressProbeForEHJump, _TEXT - EHJumpProbeProlog - - call REDHAWKGCINTERFACE__STRESSGC - - EHJumpProbeEpilog - -NESTED_END RhpGCStressProbeForEHJump, _TEXT g_pTheRuntimeInstance equ ?g_pTheRuntimeInstance@@3PEAVRuntimeInstance@@EA EXTERN g_pTheRuntimeInstance : QWORD diff --git a/src/coreclr/nativeaot/Runtime/amd64/PInvoke.asm b/src/coreclr/nativeaot/Runtime/amd64/PInvoke.asm index 95a578ebfe25d..0b08d84484995 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/PInvoke.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/PInvoke.asm @@ -3,38 +3,6 @@ include asmmacros.inc -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; -;; RhpWaitForGCNoAbort -- rare path for WaitForGCCompletion -;; -;; -;; INPUT: RCX: transition frame -;; -;; TRASHES: RCX, RDX, R8, R9, R10, R11 -;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -NESTED_ENTRY RhpWaitForGCNoAbort, _TEXT - push_vol_reg rax ; don't trash the integer return value - alloc_stack 30h - movdqa [rsp + 20h], xmm0 ; don't trash the FP return value - END_PROLOGUE - - mov rdx, [rcx + OFFSETOF__PInvokeTransitionFrame__m_pThread] - - test dword ptr [rdx + OFFSETOF__Thread__m_ThreadStateFlags], TSF_DoNotTriggerGc - jnz Done - - ; passing transition frame pointer in rcx - call RhpWaitForGC2 - -Done: - movdqa xmm0, [rsp + 20h] - add rsp, 30h - pop rax - ret - -NESTED_END RhpWaitForGCNoAbort, _TEXT - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; RhpPInvoke diff --git a/src/coreclr/nativeaot/Runtime/arm/GcProbe.asm b/src/coreclr/nativeaot/Runtime/arm/GcProbe.asm index 3e3f2c608e364..3f69f449a18da 100644 --- a/src/coreclr/nativeaot/Runtime/arm/GcProbe.asm +++ b/src/coreclr/nativeaot/Runtime/arm/GcProbe.asm @@ -135,25 +135,6 @@ __PPF_ThreadReg SETS "r2" MEND -;; -;; Macro to clear the hijack state. This is safe to do because the suspension code will not Unhijack this -;; thread if it finds it at an IP that isn't managed code. -;; -;; Register state on entry: -;; r2: thread pointer -;; -;; Register state on exit: -;; r12: trashed -;; - MACRO - ClearHijackState - - mov r12, #0 - str r12, [r2, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] - str r12, [r2, #OFFSETOF__Thread__m_pvHijackedReturnAddress] - MEND - - ;; ;; The prolog for all GC suspension hijacks (normal and stress). Fixes up the hijacked return address, and ;; clears the hijack state. @@ -177,32 +158,11 @@ __PPF_ThreadReg SETS "r2" ;; ldr lr, [r2, #OFFSETOF__Thread__m_pvHijackedReturnAddress] - ClearHijackState - MEND - -;; -;; Set the Thread state and wait for a GC to complete. -;; -;; Register state on entry: -;; r4: thread pointer -;; -;; Register state on exit: -;; r4: thread pointer -;; All other registers trashed -;; - - EXTERN RhpWaitForGCNoAbort - - MACRO - WaitForGCCompletion - - ldr r2, [r4, #OFFSETOF__Thread__m_ThreadStateFlags] - tst r2, #TSF_SuppressGcStress__OR__TSF_DoNotTriggerGC - bne %ft0 + ;; Clear hijack state + mov r12, #0 + str r12, [r2, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + str r12, [r2, #OFFSETOF__Thread__m_pvHijackedReturnAddress] - ldr r2, [r4, #OFFSETOF__Thread__m_pDeferredTransitionFrame] - bl RhpWaitForGCNoAbort -0 MEND @@ -326,8 +286,8 @@ __PPF_ThreadReg SETS "r2" NESTED_ENTRY RhpGcProbeRare PROLOG_PROBE_FRAME r2, r3, r12 - mov r4, r2 - WaitForGCCompletion + ldr r0, [r2, #OFFSETOF__Thread__m_pDeferredTransitionFrame] + bl RhpWaitForGC2 ldr r2, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags] tst r2, #PTFF_THREAD_ABORT @@ -357,8 +317,8 @@ __PPF_ThreadReg SETS "r2" ; Unhijack this thread, if necessary. INLINE_THREAD_UNHIJACK r2, r0, r1 ;; trashes r0, r1 - mov r4, r2 - WaitForGCCompletion + ldr r0, [r2, #OFFSETOF__Thread__m_pDeferredTransitionFrame] + bl RhpWaitForGC2 EPILOG_PROBE_FRAME NESTED_END RhpGcPollRare @@ -410,181 +370,7 @@ DREG_SZ equ (SIZEOF__PAL_LIMITED_CONTEXT - (OFFSETOF__PAL_LIMITED_CONTEXT__L NESTED_END RhpHijackForGcStress #endif ;; FEATURE_GC_STRESS - -;; -;; The following functions are _jumped_ to when we need to transfer control from one method to another for EH -;; dispatch. These are needed to properly coordinate with the GC hijacking logic. We are essentially replacing -;; the return from the throwing method with a jump to the handler in the caller, but we need to be aware of -;; any return address hijack that may be in place for GC suspension. These routines use a quick test of the -;; return address against a specific GC hijack routine, and then fixup the stack pointer to what it would be -;; after a real return from the throwing method. Then, if we are not hijacked we can simply jump to the -;; handler in the caller. -;; -;; If we are hijacked, then we jump to a routine that will unhijack appropriatley and wait for the GC to -;; complete. There are also variants for GC stress. -;; -;; Note that at this point we are eiher hijacked or we are not, and this will not change until we return to -;; managed code. It is an invariant of the system that a thread will only attempt to hijack or unhijack -;; another thread while the target thread is suspended in managed code, and this is _not_ managed code. -;; -;; Register state on entry: -;; r0: pointer to this function (i.e., trash) -;; r1: reference to the exception object. -;; r2: handler address we want to jump to. -;; Non-volatile registers are all already correct for return to the caller. -;; LR still contains the return address. -;; -;; Register state on exit: -;; All registers except r0 and lr unchanged -;; - MACRO - RTU_EH_JUMP_HELPER $funcName, $hijackFuncName, $isStress, $stressFuncName - - LEAF_ENTRY $funcName - ; Currently the EH epilog won't pop the return address back into LR, - ; so we have to have a funny load from [sp-4] here to retrieve it. - - ldr r0, =$hijackFuncName - cmp r0, lr - beq RhpGCProbeForEHJump - - IF $isStress - ldr r0, =$stressFuncName - cmp r0, lr - beq RhpGCStressProbeForEHJump - ENDIF - - ;; We are not hijacked, so we can return to the handler. - ;; We return to keep the call/return prediction balanced. - mov lr, r2 ; Update the return address - bx lr - LEAF_END $funcName - MEND - -;; We need an instance of the helper for each possible hijack function. The binder has enough -;; information to determine which one we need to use for any function. - RTU_EH_JUMP_HELPER RhpEHJumpScalar, RhpGcProbeHijackScalar, {false}, 0 - RTU_EH_JUMP_HELPER RhpEHJumpObject, RhpGcProbeHijackObject, {false}, 0 - RTU_EH_JUMP_HELPER RhpEHJumpByref, RhpGcProbeHijackByref, {false}, 0 #ifdef FEATURE_GC_STRESS - RTU_EH_JUMP_HELPER RhpEHJumpScalarGCStress, RhpGcProbeHijackScalar, {true}, RhpGcStressHijackScalar - RTU_EH_JUMP_HELPER RhpEHJumpObjectGCStress, RhpGcProbeHijackObject, {true}, RhpGcStressHijackObject - RTU_EH_JUMP_HELPER RhpEHJumpByrefGCStress, RhpGcProbeHijackByref, {true}, RhpGcStressHijackByref -#endif - -;; -;; Macro to setup our frame and adjust the location of the EH object reference for EH jump probe funcs. -;; -;; Register state on entry: -;; r0: scratch -;; r1: reference to the exception object. -;; r2: handler address we want to jump to. -;; Non-volatile registers are all already correct for return to the caller. -;; The stack is as if we are just about to returned from the call -;; -;; Register state on exit: -;; r0: reference to the exception object -;; r2: thread pointer -;; - MACRO - EHJumpProbeProlog - - PROLOG_PUSH {r1,r2} ; save the handler address so we can jump to it later (save r1 just for alignment) - PROLOG_NOP mov r0, r1 ; move the ex object reference into r0 so we can report it - ALLOC_PROBE_FRAME - - ;; r2 <- GetThread(), TRASHES r1 - INLINE_GETTHREAD r2, r1 - - ;; Recover the original return address and update the frame - ldr lr, [r2, #OFFSETOF__Thread__m_pvHijackedReturnAddress] - str lr, [sp, #OFFSETOF__PInvokeTransitionFrame__m_RIP] - - ;; ClearHijackState expects thread in r2 (trashes r12). - ClearHijackState - - ; TRASHES r1 - INIT_PROBE_FRAME r2, r1, #PROBE_SAVE_FLAGS_R0_IS_GCREF, (PROBE_FRAME_SIZE + 8) - str sp, [r2, #OFFSETOF__Thread__m_pDeferredTransitionFrame] - MEND - -;; -;; Macro to re-adjust the location of the EH object reference, cleanup the frame, and make the -;; final jump to the handler for EH jump probe funcs. -;; -;; Register state on entry: -;; r0: reference to the exception object -;; r1-r3: scratch -;; -;; Register state on exit: -;; sp: correct for return to the caller -;; r1: reference to the exception object -;; - MACRO - EHJumpProbeEpilog - - FREE_PROBE_FRAME ; This restores exception object back into r0 - EPILOG_NOP mov r1, r0 ; Move the Exception object back into r1 where the catch handler expects it - EPILOG_POP {r0,pc} ; Recover the handler address and jump to it - MEND - -;; -;; We are hijacked for a normal GC (not GC stress), so we need to unhijack and wait for the GC to complete. -;; -;; Register state on entry: -;; r0: reference to the exception object. -;; r2: thread -;; Non-volatile registers are all already correct for return to the caller. -;; The stack is as if we have tail called to this function (lr points to return address). -;; -;; Register state on exit: -;; r7: previous frame pointer -;; r0: reference to the exception object -;; - NESTED_ENTRY RhpGCProbeForEHJump - EHJumpProbeProlog - -#ifdef _DEBUG - ;; - ;; If we get here, then we have been hijacked for a real GC, and our SyncState must - ;; reflect that we've been requested to synchronize. - - ldr r1, =RhpTrapThreads - ldr r1, [r1] - tst r1, #TrapThreadsFlags_TrapThreads - bne %0 - - bl RhDebugBreak -0 -#endif ;; _DEBUG - - mov r4, r2 - WaitForGCCompletion - - EHJumpProbeEpilog - NESTED_END RhpGCProbeForEHJump - -#ifdef FEATURE_GC_STRESS -;; -;; We are hijacked for GC Stress (not a normal GC) so we need to invoke the GC stress helper. -;; -;; Register state on entry: -;; r1: reference to the exception object. -;; r2: thread -;; Non-volatile registers are all already correct for return to the caller. -;; The stack is as if we have tail called to this function (lr points to return address). -;; -;; Register state on exit: -;; r7: previous frame pointer -;; r0: reference to the exception object -;; - NESTED_ENTRY RhpGCStressProbeForEHJump - EHJumpProbeProlog - - bl $REDHAWKGCINTERFACE__STRESSGC - - EHJumpProbeEpilog - NESTED_END RhpGCStressProbeForEHJump ;; ;; INVARIANT: Don't trash the argument registers, the binder codegen depends on this. diff --git a/src/coreclr/nativeaot/Runtime/arm/PInvoke.asm b/src/coreclr/nativeaot/Runtime/arm/PInvoke.asm index 26c2055fd79c2..234b50c117bad 100644 --- a/src/coreclr/nativeaot/Runtime/arm/PInvoke.asm +++ b/src/coreclr/nativeaot/Runtime/arm/PInvoke.asm @@ -5,34 +5,4 @@ TEXTAREA -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; -;; RhpWaitForGCNoAbort -;; -;; -;; INPUT: r2: transition frame -;; -;; OUTPUT: -;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - NESTED_ENTRY RhpWaitForGCNoAbort - - PROLOG_PUSH {r0-r6,lr} ; Even number of registers to maintain 8-byte stack alignment - PROLOG_VPUSH {d0-d3} ; Save float return value registers as well - - ldr r5, [r2, #OFFSETOF__PInvokeTransitionFrame__m_pThread] - - ldr r0, [r5, #OFFSETOF__Thread__m_ThreadStateFlags] - tst r0, #TSF_DoNotTriggerGc - bne Done - - mov r0, r2 ; passing transition frame in r0 - bl RhpWaitForGC2 - -Done - EPILOG_VPOP {d0-d3} - EPILOG_POP {r0-r6,pc} - - NESTED_END RhpWaitForGCNoAbort - INLINE_GETTHREAD_CONSTANT_POOL diff --git a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm index b232e380fb075..66c91deadbb90 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm +++ b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm @@ -187,25 +187,6 @@ __PPF_ThreadReg SETS "x2" EPILOG_RETURN MEND -;; -;; Macro to clear the hijack state. This is safe to do because the suspension code will not Unhijack this -;; thread if it finds it at an IP that isn't managed code. -;; -;; Register state on entry: -;; x2: thread pointer -;; -;; Register state on exit: -;; - MACRO - ClearHijackState - - ASSERT OFFSETOF__Thread__m_pvHijackedReturnAddress == (OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation + 8) - ;; Clear m_ppvHijackedReturnAddressLocation and m_pvHijackedReturnAddress - stp xzr, xzr, [x2, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] - ;; Clear m_uHijackedReturnValueFlags - str xzr, [x2, #OFFSETOF__Thread__m_uHijackedReturnValueFlags] - MEND - ;; ;; The prolog for all GC suspension hijacks (normal and stress). Fixes up the hijacked return address, and ;; clears the hijack state. @@ -231,32 +212,15 @@ __PPF_ThreadReg SETS "x2" ;; Load m_pvHijackedReturnAddress and m_uHijackedReturnValueFlags ldp lr, x12, [x2, #OFFSETOF__Thread__m_pvHijackedReturnAddress] - ClearHijackState - MEND - -;; -;; Set the Thread state and wait for a GC to complete. -;; -;; Register state on entry: -;; x4: thread pointer -;; -;; Register state on exit: -;; x4: thread pointer -;; All other registers trashed -;; - - EXTERN RhpWaitForGCNoAbort - - MACRO - WaitForGCCompletion - - ldr w2, [x4, #OFFSETOF__Thread__m_ThreadStateFlags] - tst w2, #TSF_SuppressGcStress__OR__TSF_DoNotTriggerGC - bne %ft0 + ;; + ;; Clear hijack state + ;; + ASSERT OFFSETOF__Thread__m_pvHijackedReturnAddress == (OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation + 8) + ;; Clear m_ppvHijackedReturnAddressLocation and m_pvHijackedReturnAddress + stp xzr, xzr, [x2, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + ;; Clear m_uHijackedReturnValueFlags + str xzr, [x2, #OFFSETOF__Thread__m_uHijackedReturnValueFlags] - ldr x9, [x4, #OFFSETOF__Thread__m_pDeferredTransitionFrame] - bl RhpWaitForGCNoAbort -0 MEND MACRO @@ -337,8 +301,8 @@ __PPF_ThreadReg SETS "x2" NESTED_ENTRY RhpGcProbeRare PROLOG_PROBE_FRAME x2, x3, x12, - mov x4, x2 - WaitForGCCompletion + ldr x0, [x2, #OFFSETOF__Thread__m_pDeferredTransitionFrame] + bl RhpWaitForGC2 ldr x2, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags] tbnz x2, #PTFF_THREAD_ABORT_BIT, %F1 diff --git a/src/coreclr/nativeaot/Runtime/arm64/PInvoke.S b/src/coreclr/nativeaot/Runtime/arm64/PInvoke.S index 3c8ce2eafedea..8ed8a497d4abf 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/PInvoke.S +++ b/src/coreclr/nativeaot/Runtime/arm64/PInvoke.S @@ -18,42 +18,6 @@ TSF_Attached_Bit = 0 TSF_SuppressGcStress_Bit = 3 TSF_DoNotTriggerGc_Bit = 4 -////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// -// RhpWaitForGCNoAbort -// -// -// INPUT: x9: transition frame -// -// TRASHES: None -// -////////////////////////////////////////////////////////////////////////////////////////////////////////////// - NESTED_ENTRY RhpWaitForGCNoAbort, _TEXT, NoHandler - - // FP and LR registers - PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -0x40 // Push down stack pointer and store FP and LR - - // Save the integer return registers, as well as the floating return registers - stp x0, x1, [sp, #0x10] - stp d0, d1, [sp, #0x20] - stp d2, d3, [sp, #0x30] - - ldr x0, [x9, #OFFSETOF__PInvokeTransitionFrame__m_pThread] - ldr w0, [x0, #OFFSETOF__Thread__m_ThreadStateFlags] - tbnz x0, #TSF_DoNotTriggerGc_Bit, Done - - mov x0, x9 // passing transition frame in x0 - bl RhpWaitForGC2 - -Done: - ldp x0, x1, [sp, #0x10] - ldp d0, d1, [sp, #0x20] - ldp d2, d3, [sp, #0x30] - EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 0x40 - EPILOG_RETURN - - NESTED_END RhpWaitForGCNoAbort - // // RhpPInvoke // diff --git a/src/coreclr/nativeaot/Runtime/arm64/PInvoke.asm b/src/coreclr/nativeaot/Runtime/arm64/PInvoke.asm index 27d40b911c50f..475737fea71a0 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/PInvoke.asm +++ b/src/coreclr/nativeaot/Runtime/arm64/PInvoke.asm @@ -5,44 +5,6 @@ TEXTAREA -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; -;; RhpWaitForGCNoAbort -;; -;; -;; INPUT: x9: transition frame -;; -;; TRASHES: None -;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - NESTED_ENTRY RhpWaitForGCNoAbort - - ;; FP and LR registers - PROLOG_SAVE_REG_PAIR fp, lr, #-0x40! ;; Push down stack pointer and store FP and LR - - ;; Save the integer return registers, as well as the floating return registers - stp x0, x1, [sp, #0x10] - stp d0, d1, [sp, #0x20] - stp d2, d3, [sp, #0x30] - - ldr x0, [x9, #OFFSETOF__PInvokeTransitionFrame__m_pThread] - ldr w0, [x0, #OFFSETOF__Thread__m_ThreadStateFlags] - tbnz x0, #TSF_DoNotTriggerGc_Bit, Done - - mov x0, x9 ; passing transition frame in x0 - bl RhpWaitForGC2 - -Done - ldp x0, x1, [sp, #0x10] - ldp d0, d1, [sp, #0x20] - ldp d2, d3, [sp, #0x30] - EPILOG_RESTORE_REG_PAIR fp, lr, #0x40! - EPILOG_RETURN - - NESTED_END RhpWaitForGCNoAbort - - INLINE_GETTHREAD_CONSTANT_POOL - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; RhpPInvoke diff --git a/src/coreclr/nativeaot/Runtime/i386/GcProbe.asm b/src/coreclr/nativeaot/Runtime/i386/GcProbe.asm index c27c6645a2c6a..578e39d59f340 100644 --- a/src/coreclr/nativeaot/Runtime/i386/GcProbe.asm +++ b/src/coreclr/nativeaot/Runtime/i386/GcProbe.asm @@ -13,20 +13,6 @@ include AsmMacros.inc DEFAULT_PROBE_SAVE_FLAGS equ PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_RSP PROBE_SAVE_FLAGS_EVERYTHING equ DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_ALL_SCRATCH PROBE_SAVE_FLAGS_RAX_IS_GCREF equ DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_GCREF -;; -;; Macro to clear the hijack state. This is safe to do because the suspension code will not Unhijack this -;; thread if it finds it at an IP that isn't managed code. -;; -;; Register state on entry: -;; EDX: thread pointer -;; -;; Register state on exit: -;; No changes -;; -ClearHijackState macro - mov dword ptr [edx + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], 0 - mov dword ptr [edx + OFFSETOF__Thread__m_pvHijackedReturnAddress], 0 -endm ;; ;; The prolog for all GC suspension hijackes (normal and stress). Sets up an EBP frame, @@ -58,7 +44,12 @@ HijackFixupProlog macro mov ecx, [edx + OFFSETOF__Thread__m_pvHijackedReturnAddress] mov [ebp + 4], ecx - ClearHijackState + ;; + ;; Clear hijack state + ;; + mov dword ptr [edx + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], 0 + mov dword ptr [edx + OFFSETOF__Thread__m_pvHijackedReturnAddress], 0 + endm ;; @@ -145,33 +136,6 @@ PopProbeFrame macro pop eax endm -;; -;; Set the Thread state and wait for a GC to complete. -;; -;; Register state on entry: -;; ESP: pointer to a PInvokeTransitionFrame on the stack -;; EBX: thread pointer -;; EBP: EBP frame -;; -;; Register state on exit: -;; ESP: pointer to a PInvokeTransitionFrame on the stack -;; EBX: thread pointer -;; EBP: EBP frame -;; All other registers trashed -;; - -EXTERN _RhpWaitForGCNoAbort : PROC - -WaitForGCCompletion macro - test dword ptr [ebx + OFFSETOF__Thread__m_ThreadStateFlags], TSF_SuppressGcStress + TSF_DoNotTriggerGc - jnz @F - - mov ecx, esp - call _RhpWaitForGCNoAbort -@@: - -endm - RhpThrowHwEx equ @RhpThrowHwEx@0 extern RhpThrowHwEx : proc @@ -198,7 +162,8 @@ RhpGcProbe proc SynchronousRendezVous: PushProbeFrame ecx ; bitmask in ECX - WaitForGCCompletion + mov ecx, esp + call RhpWaitForGC2 mov edx, [esp + OFFSETOF__PInvokeTransitionFrame__m_Flags] ;; @@ -356,201 +321,6 @@ FASTCALL_FUNC RhpHijackForGcStress, 0 pop ebp ret FASTCALL_ENDFUNC -endif ;; FEATURE_GC_STRESS - -;; -;; The following functions are _jumped_ to when we need to transfer control from one method to another for EH -;; dispatch. These are needed to properly coordinate with the GC hijacking logic. We are essentially replacing -;; the return from the throwing method with a jump to the handler in the caller, but we need to be aware of -;; any return address hijack that may be in place for GC suspension. These routines use a quick test of the -;; return address against a specific GC hijack routine, and then fixup the stack pointer to what it would be -;; after a real return from the throwing method. Then, if we are not hijacked we can simply jump to the -;; handler in the caller. -;; -;; If we are hijacked, then we jump to a routine that will unhijack appropriatley and wait for the GC to -;; complete. There are also variants for GC stress. -;; -;; Note that at this point we are eiher hijacked or we are not, and this will not change until we return to -;; managed code. It is an invariant of the system that a thread will only attempt to hijack or unhijack -;; another thread while the target thread is suspended in managed code, and this is _not_ managed code. -;; -;; Register state on entry: -;; EAX: handler address we want to jump to. -;; ECX: reference to the exception object. -;; EDX: what ESP should be after the return address and arg space are removed. -;; EBX, ESI, EDI, and EBP are all already correct for return to the caller. -;; The stack still contains the return address and the arguments to the call. -;; -;; Register state on exit: -;; ESP: what it would be after a complete return to the caller. -;; -RTU_EH_JUMP_HELPER macro funcName, hijackFuncName, isStress, stressFuncName -FASTCALL_FUNC funcName, 0 - cmp [esp], hijackFuncName - je RhpGCProbeForEHJump - -IF isStress EQ 1 - cmp [esp], stressFuncName - je RhpGCStressProbeForEHJump -ENDIF - - ;; We are not hijacked, so we can return to the handler. - ;; We return to keep the call/return prediction balanced. - mov esp, edx ; The stack is now as if we have returned from the call. - push eax ; Push the handler as the return address. - ret - -FASTCALL_ENDFUNC -endm - - -;; We need an instance of the helper for each possible hijack function. The binder has enough -;; information to determine which one we need to use for any function. -RTU_EH_JUMP_HELPER RhpEHJumpScalar, @RhpGcProbeHijackScalar@0, 0, 0 -RTU_EH_JUMP_HELPER RhpEHJumpObject, @RhpGcProbeHijackObject@0, 0, 0 -RTU_EH_JUMP_HELPER RhpEHJumpByref, @RhpGcProbeHijackByref@0, 0, 0 -ifdef FEATURE_GC_STRESS -RTU_EH_JUMP_HELPER RhpEHJumpScalarGCStress, @RhpGcProbeHijackScalar@0, 1, @RhpGcStressHijackScalar@0 -RTU_EH_JUMP_HELPER RhpEHJumpObjectGCStress, @RhpGcProbeHijackObject@0, 1, @RhpGcStressHijackObject@0 -RTU_EH_JUMP_HELPER RhpEHJumpByrefGCStress, @RhpGcProbeHijackByref@0, 1, @RhpGcStressHijackByref@0 -endif - -;; -;; Macro to setup our EBP frame and adjust the location of the EH object reference for EH jump probe funcs. -;; -;; Register state on entry: -;; EAX: handler address we want to jump to. -;; ECX: reference to the exception object. -;; EDX: scratch -;; EBX, ESI, EDI, and EBP are all already correct for return to the caller. -;; The stack is as if we have returned from the call -;; -;; Register state on exit: -;; ESP: ebp frame -;; EBP: ebp frame setup with space reserved for the repaired return address -;; EAX: reference to the exception object -;; ECX: scratch -;; -EHJumpProbeProlog macro - push eax ; save a slot for the repaired return address - push ebp ; setup an ebp frame to keep the stack nicely crawlable - mov ebp, esp - push eax ; save the handler address so we can jump to it later - mov eax, ecx ; move the ex object reference into eax so we can report it -endm - -;; -;; Macro to re-adjust the location of the EH object reference, cleanup the EBP frame, and make the -;; final jump to the handler for EH jump probe funcs. -;; -;; Register state on entry: -;; EAX: reference to the exception object -;; ESP: ebp frame -;; EBP: ebp frame setup with the correct return (handler) address -;; ECX: scratch -;; EDX: scratch -;; -;; Register state on exit: -;; ESP: correct for return to the caller -;; EBP: previous ebp frame -;; ECX: reference to the exception object -;; EDX: trashed -;; -EHJumpProbeEpilog macro - mov ecx, eax ; Put the EX obj ref back into ecx for the handler. - pop eax ; Recover the handler address. - pop ebp ; Pop the ebp frame we setup. - pop edx ; Pop the original return address, which we do not need. - push eax ; Push the handler as the return address. - ret -endm - -;; -;; We are hijacked for a normal GC (not GC stress), so we need to unhijcak and wait for the GC to complete. -;; -;; Register state on entry: -;; EAX: handler address we want to jump to. -;; ECX: reference to the exception object. -;; EDX: what ESP should be after the return address and arg space are removed. -;; EBX, ESI, EDI, and EBP are all already correct for return to the caller. -;; The stack is as if we have returned from the call -;; -;; Register state on exit: -;; ESP: correct for return to the caller -;; EBP: previous ebp frame -;; ECX: reference to the exception object -;; -RhpGCProbeForEHJump proc - mov esp, edx ; The stack is now as if we have returned from the call. - EHJumpProbeProlog - - ;; edx <- GetThread(), TRASHES ecx - INLINE_GETTHREAD edx, ecx - - ;; Fix the stack by pushing the original return address - mov ecx, [edx + OFFSETOF__Thread__m_pvHijackedReturnAddress] - mov [ebp + 4], ecx - - ClearHijackState - -ifdef _DEBUG - ;; - ;; If we get here, then we have been hijacked for a real GC, and our SyncState must - ;; reflect that we've been requested to synchronize. - - test [RhpTrapThreads], TrapThreadsFlags_TrapThreads - jnz @F - - call RhDebugBreak -@@: -endif ;; _DEBUG - - - PushProbeFrame PROBE_SAVE_FLAGS_RAX_IS_GCREF - WaitForGCCompletion - PopProbeFrame - - EHJumpProbeEpilog - -RhpGCProbeForEHJump endp - -ifdef FEATURE_GC_STRESS -;; -;; We are hijacked for GC Stress (not a normal GC) so we need to invoke the GC stress helper. -;; -;; Register state on entry: -;; EAX: handler address we want to jump to. -;; ECX: reference to the exception object. -;; EDX: what ESP should be after the return address and arg space are removed. -;; EBX, ESI, EDI, and EBP are all already correct for return to the caller. -;; The stack is as if we have returned from the call -;; -;; Register state on exit: -;; ESP: correct for return to the caller -;; EBP: previous ebp frame -;; ECX: reference to the exception object -;; -RhpGCStressProbeForEHJump proc - mov esp, edx ; The stack is now as if we have returned from the call. - EHJumpProbeProlog - - ;; edx <- GetThread(), TRASHES ecx - INLINE_GETTHREAD edx, ecx - - ;; Fix the stack by pushing the original return address - mov ecx, [edx + OFFSETOF__Thread__m_pvHijackedReturnAddress] - mov [ebp + 4], ecx - - ClearHijackState - - PushProbeFrame PROBE_SAVE_FLAGS_RAX_IS_GCREF - StressGC - PopProbeFrame - - EHJumpProbeEpilog - -RhpGCStressProbeForEHJump endp - endif ;; FEATURE_GC_STRESS end diff --git a/src/coreclr/nativeaot/Runtime/i386/PInvoke.asm b/src/coreclr/nativeaot/Runtime/i386/PInvoke.asm index cf476de49309c..90f0d083a842a 100644 --- a/src/coreclr/nativeaot/Runtime/i386/PInvoke.asm +++ b/src/coreclr/nativeaot/Runtime/i386/PInvoke.asm @@ -9,39 +9,4 @@ include AsmMacros.inc -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; -;; RhpWaitForGCNoAbort -;; -;; -;; INPUT: ECX: transition frame -;; -;; OUTPUT: -;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -_RhpWaitForGCNoAbort proc public - push ebp - mov ebp, esp - push eax - push edx - push ebx - push esi - - mov esi, [ecx + OFFSETOF__PInvokeTransitionFrame__m_pThread] - - test dword ptr [esi + OFFSETOF__Thread__m_ThreadStateFlags], TSF_DoNotTriggerGc - jnz Done - - ; passing transition frame pointer in ecx - call RhpWaitForGC2 - -Done: - pop esi - pop ebx - pop edx - pop eax - pop ebp - ret -_RhpWaitForGCNoAbort endp - end From eaf6efffa70cf86f2d927abfd521c40288be3e2b Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Wed, 20 Jul 2022 10:30:17 -0700 Subject: [PATCH 04/17] remove unused extraStack parameter to PUSH_PROBE_FRAME --- .../nativeaot/Runtime/amd64/GcProbe.asm | 25 ++++++++----------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm index f114c17655326..196ac73ba4d01 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm @@ -14,16 +14,15 @@ PROBE_SAVE_FLAGS_RAX_IS_GCREF equ DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + P ;; - BITMASK: bitmask describing pushes, may be volatile register or constant value ;; - RAX: managed function return value, may be an object or byref ;; - preserved regs: need to stay preserved, may contain objects or byrefs -;; - extraStack bytes of stack have already been allocated ;; ;; INVARIANTS ;; - The macro assumes it is called from a prolog, prior to a frame pointer being setup. ;; - All preserved registers remain unchanged from their values in managed code. ;; -PUSH_PROBE_FRAME macro threadReg, trashReg, extraStack, BITMASK +PUSH_PROBE_FRAME macro threadReg, trashReg, BITMASK push_vol_reg rax ; save RAX, it might contain an objectref - lea trashReg, [rsp + 10h + extraStack] + lea trashReg, [rsp + 10h] push_vol_reg trashReg ; save caller's RSP push_nonvol_reg r15 ; save preserved registers push_nonvol_reg r14 ; .. @@ -35,12 +34,12 @@ PUSH_PROBE_FRAME macro threadReg, trashReg, extraStack, BITMASK push_vol_reg BITMASK ; save the register bitmask passed in by caller push_vol_reg threadReg ; Thread * (unused by stackwalker) push_nonvol_reg rbp ; save caller's RBP - mov trashReg, [rsp + 12*8 + extraStack] ; Find the return address + mov trashReg, [rsp + 12*8] ; Find the return address push_vol_reg trashReg ; save m_RIP lea trashReg, [rsp + 0] ; trashReg == address of frame ;; allocate scratch space and any required alignment - alloc_stack 20h + 10h + (extraStack AND (10h-1)) + alloc_stack 20h + 10h ;; save xmm0 in case it's being used as a return value movdqa [rsp + 20h], xmm0 @@ -54,11 +53,9 @@ endm ;; registers and return value to their values from before the probe was called (while also updating any ;; object refs or byrefs). ;; -;; NOTE: does NOT deallocate the 'extraStack' portion of the stack, the user of this macro must do that. -;; -POP_PROBE_FRAME macro extraStack +POP_PROBE_FRAME macro movdqa xmm0, [rsp + 20h] - add rsp, 20h + 10h + (extraStack AND (10h-1)) + 8 + add rsp, 20h + 10h + 8 ; deallocate stack and discard saved m_RIP pop rbp pop rax ; discard Thread* pop rax ; discard BITMASK @@ -171,12 +168,12 @@ LEAF_END RhpGcStressHijackByref, _TEXT ;; All other registers restored as they were when the hijack was first reached. ;; NESTED_ENTRY RhpGcStressProbe, _TEXT - PUSH_PROBE_FRAME rdx, rax, 0, rcx + PUSH_PROBE_FRAME rdx, rax, rcx END_PROLOGUE call REDHAWKGCINTERFACE__STRESSGC - POP_PROBE_FRAME 0 + POP_PROBE_FRAME ret NESTED_END RhpGcStressProbe, _TEXT @@ -189,7 +186,7 @@ NESTED_ENTRY RhpGcProbe, _TEXT jnz @f ret @@: - PUSH_PROBE_FRAME rdx, rax, 0, rcx + PUSH_PROBE_FRAME rdx, rax, rcx END_PROLOGUE mov rbx, rdx @@ -199,10 +196,10 @@ NESTED_ENTRY RhpGcProbe, _TEXT mov rax, [rbx + OFFSETOF__Thread__m_pDeferredTransitionFrame] test dword ptr [rax + OFFSETOF__PInvokeTransitionFrame__m_Flags], PTFF_THREAD_ABORT jnz Abort - POP_PROBE_FRAME 0 + POP_PROBE_FRAME ret Abort: - POP_PROBE_FRAME 0 + POP_PROBE_FRAME mov rcx, STATUS_REDHAWK_THREAD_ABORT pop rdx ;; return address as exception RIP jmp RhpThrowHwEx ;; Throw the ThreadAbortException as a special kind of hardware exception From 1f29db1d1eaa6d4d61e500750941864152bb3ba0 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Wed, 20 Jul 2022 12:08:29 -0700 Subject: [PATCH 05/17] make all working variants of RhpGcProbeHijack to have the same shape --- src/coreclr/nativeaot/Runtime/amd64/GcProbe.S | 4 +- .../nativeaot/Runtime/amd64/GcProbe.asm | 156 +++++++----------- .../nativeaot/Runtime/arm64/GcProbe.asm | 71 ++++---- src/coreclr/nativeaot/Runtime/thread.cpp | 74 ++------- src/coreclr/nativeaot/Runtime/thread.h | 9 +- 5 files changed, 114 insertions(+), 200 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.S b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.S index ee1dab36391d2..ab917f3a86e14 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.S +++ b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.S @@ -80,7 +80,7 @@ // RAX, RDX preserved, other volatile regs trashed // .macro FixupHijackedCallstack - // preserve RAX, RDX as they may contain retuvalues + // preserve RAX, RDX as they may contain return values push rax push rdx @@ -97,6 +97,7 @@ mov rcx, [r11 + OFFSETOF__Thread__m_pvHijackedReturnAddress] push rcx + // Fetch the return address flags mov rcx, [r11 + OFFSETOF__Thread__m_uHijackedReturnValueFlags] // @@ -155,7 +156,6 @@ LEAF_ENTRY RhpGcPoll, _TEXT ret LOCAL_LABEL(RhpGcPoll_RarePath): jmp C_FUNC(RhpGcPollRare) - LEAF_END RhpGcPoll, _TEXT NESTED_ENTRY RhpGcPollRare, _TEXT, NoHandler diff --git a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm index 196ac73ba4d01..bbde9e632b817 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm @@ -78,8 +78,9 @@ endm ;; All registers correct for return to the original return address. ;; ;; Register state on exit: -;; RCX: trashed ;; RDX: thread pointer +;; RCX: return value flags +;; RAX: preserved, other volatile regs trashed ;; FixupHijackedCallstack macro @@ -92,12 +93,16 @@ FixupHijackedCallstack macro mov rcx, [rdx + OFFSETOF__Thread__m_pvHijackedReturnAddress] push rcx + ;; Fetch the return address flags + mov rcx, [rdx + OFFSETOF__Thread__m_uHijackedReturnValueFlags] + ;; ;; Clear hijack state ;; - xor ecx, ecx - mov [rdx + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], rcx - mov [rdx + OFFSETOF__Thread__m_pvHijackedReturnAddress], rcx + xor r9, r9 + mov [rdx + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], r9 + mov [rdx + OFFSETOF__Thread__m_pvHijackedReturnAddress], r9 + mov [rdx + OFFSETOF__Thread__m_uHijackedReturnValueFlags], r9 endm @@ -106,53 +111,74 @@ EXTERN RhpPInvokeExceptionGuard : PROC ;; ;; ;; -;; GC Probe Hijack targets +;; GC Probe Hijack target ;; + ;; -NESTED_ENTRY RhpGcProbeHijackScalar, _TEXT, RhpPInvokeExceptionGuard +NESTED_ENTRY RhpGcProbeHijack, _TEXT, RhpPInvokeExceptionGuard END_PROLOGUE FixupHijackedCallstack - mov ecx, DEFAULT_FRAME_SAVE_FLAGS + or ecx, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX jmp RhpGcProbe -NESTED_END RhpGcProbeHijackScalar, _TEXT +NESTED_END RhpGcProbeHijack, _TEXT + +EXTERN RhpThrowHwEx : PROC -NESTED_ENTRY RhpGcProbeHijackObject, _TEXT, RhpPInvokeExceptionGuard +NESTED_ENTRY RhpGcProbe, _TEXT + test [RhpTrapThreads], TrapThreadsFlags_TrapThreads + jnz @f + ret +@@: + PUSH_PROBE_FRAME rdx, rax, rcx END_PROLOGUE - FixupHijackedCallstack - mov ecx, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_GCREF - jmp RhpGcProbe -NESTED_END RhpGcProbeHijackObject, _TEXT -NESTED_ENTRY RhpGcProbeHijackByref, _TEXT, RhpPInvokeExceptionGuard + mov rbx, rdx + mov rcx, [rbx + OFFSETOF__Thread__m_pDeferredTransitionFrame] + call RhpWaitForGC2 + + mov rax, [rbx + OFFSETOF__Thread__m_pDeferredTransitionFrame] + test dword ptr [rax + OFFSETOF__PInvokeTransitionFrame__m_Flags], PTFF_THREAD_ABORT + jnz Abort + POP_PROBE_FRAME + ret +Abort: + POP_PROBE_FRAME + mov rcx, STATUS_REDHAWK_THREAD_ABORT + pop rdx ;; return address as exception RIP + jmp RhpThrowHwEx ;; Throw the ThreadAbortException as a special kind of hardware exception + +NESTED_END RhpGcProbe, _TEXT + +LEAF_ENTRY RhpGcPoll, _TEXT + cmp [RhpTrapThreads], TrapThreadsFlags_None + jne @F ; forward branch - predicted not taken + ret +@@: + jmp RhpGcPollRare +LEAF_END RhpGcPoll, _TEXT + +NESTED_ENTRY RhpGcPollRare, _TEXT + PUSH_COOP_PINVOKE_FRAME rcx END_PROLOGUE - FixupHijackedCallstack - mov ecx, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_BYREF - jmp RhpGcProbe -NESTED_END RhpGcProbeHijackByref, _TEXT + call RhpGcPoll2 + POP_COOP_PINVOKE_FRAME + ret +NESTED_END RhpGcPollRare, _TEXT + + ifdef FEATURE_GC_STRESS + ;; ;; ;; GC Stress Hijack targets ;; ;; -LEAF_ENTRY RhpGcStressHijackScalar, _TEXT - FixupHijackedCallstack - mov ecx, DEFAULT_FRAME_SAVE_FLAGS - jmp RhpGcStressProbe -LEAF_END RhpGcStressHijackScalar, _TEXT - -LEAF_ENTRY RhpGcStressHijackObject, _TEXT - FixupHijackedCallstack - mov ecx, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_GCREF - jmp RhpGcStressProbe -LEAF_END RhpGcStressHijackObject, _TEXT - -LEAF_ENTRY RhpGcStressHijackByref, _TEXT +LEAF_ENTRY RhpGcStressHijack, _TEXT FixupHijackedCallstack - mov ecx, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_BYREF + or ecx, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX jmp RhpGcStressProbe -LEAF_END RhpGcStressHijackByref, _TEXT +LEAF_END RhpGcStressHijack, _TEXT ;; ;; Worker for our GC stress probes. Do not call directly!! @@ -177,37 +203,6 @@ NESTED_ENTRY RhpGcStressProbe, _TEXT ret NESTED_END RhpGcStressProbe, _TEXT -endif ;; FEATURE_GC_STRESS - -EXTERN RhpThrowHwEx : PROC - -NESTED_ENTRY RhpGcProbe, _TEXT - test [RhpTrapThreads], TrapThreadsFlags_TrapThreads - jnz @f - ret -@@: - PUSH_PROBE_FRAME rdx, rax, rcx - END_PROLOGUE - - mov rbx, rdx - mov rcx, [rbx + OFFSETOF__Thread__m_pDeferredTransitionFrame] - call RhpWaitForGC2 - - mov rax, [rbx + OFFSETOF__Thread__m_pDeferredTransitionFrame] - test dword ptr [rax + OFFSETOF__PInvokeTransitionFrame__m_Flags], PTFF_THREAD_ABORT - jnz Abort - POP_PROBE_FRAME - ret -Abort: - POP_PROBE_FRAME - mov rcx, STATUS_REDHAWK_THREAD_ABORT - pop rdx ;; return address as exception RIP - jmp RhpThrowHwEx ;; Throw the ThreadAbortException as a special kind of hardware exception - -NESTED_END RhpGcProbe, _TEXT - - -ifdef FEATURE_GC_STRESS ;; PAL_LIMITED_CONTEXT, 6 xmm regs to save, 2 scratch regs to save, plus 20h bytes for scratch space RhpHijackForGcStress_FrameSize equ SIZEOF__PAL_LIMITED_CONTEXT + 6*10h + 2*8h + 20h @@ -301,25 +296,14 @@ NESTED_ENTRY RhpHijackForGcStress, _TEXT ret NESTED_END RhpHijackForGcStress, _TEXT -endif ;; FEATURE_GC_STRESS - - -;; -ifdef FEATURE_GC_STRESS - g_pTheRuntimeInstance equ ?g_pTheRuntimeInstance@@3PEAVRuntimeInstance@@EA EXTERN g_pTheRuntimeInstance : QWORD RuntimeInstance__ShouldHijackLoopForGcStress equ ?ShouldHijackLoopForGcStress@RuntimeInstance@@QEAA_N_K@Z EXTERN RuntimeInstance__ShouldHijackLoopForGcStress : PROC -endif ;; FEATURE_GC_STRESS - EXTERN g_fGcStressStarted : DWORD EXTERN g_fHasFastFxsave : BYTE -FXSAVE_SIZE equ 512 - -ifdef FEATURE_GC_STRESS ;; ;; INVARIANT: Don't trash the argument registers, the binder codegen depends on this. ;; @@ -330,29 +314,9 @@ LEAF_ENTRY RhpSuppressGcStress, _TEXT ret LEAF_END RhpSuppressGcStress, _TEXT -endif ;; FEATURE_GC_STRESS -LEAF_ENTRY RhpGcPoll, _TEXT - - cmp [RhpTrapThreads], TrapThreadsFlags_None - jne @F ; forward branch - predicted not taken - ret -@@: - jmp RhpGcPollRare - -LEAF_END RhpGcPoll, _TEXT - -NESTED_ENTRY RhpGcPollRare, _TEXT - - PUSH_COOP_PINVOKE_FRAME rcx - END_PROLOGUE - - call RhpGcPoll2 - - POP_COOP_PINVOKE_FRAME +endif ;; FEATURE_GC_STRESS - ret -NESTED_END RhpGcPollRare, _TEXT end diff --git a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm index 66c91deadbb90..0075023cc849f 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm +++ b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm @@ -239,7 +239,7 @@ __PPF_ThreadReg SETS "x2" ;; ;; ;; -;; GC Probe Hijack targets +;; GC Probe Hijack target ;; ;; EXTERN RhpPInvokeExceptionGuard @@ -254,41 +254,6 @@ __PPF_ThreadReg SETS "x2" b RhpGcProbe NESTED_END RhpGcProbeHijackWrapper -#ifdef FEATURE_GC_STRESS -;; -;; -;; GC Stress Hijack targets -;; -;; - LEAF_ENTRY RhpGcStressHijack - FixupHijackedCallstack - orr x12, x12, #DEFAULT_FRAME_SAVE_FLAGS - b RhpGcStressProbe - LEAF_END RhpGcStressHijack -;; -;; Worker for our GC stress probes. Do not call directly!! -;; Instead, go through RhpGcStressHijack{Scalar|Object|Byref}. -;; This worker performs the GC Stress work and returns to the original return address. -;; -;; Register state on entry: -;; x0: hijacked function return value -;; x1: hijacked function return value -;; x2: thread pointer -;; w12: register bitmask -;; -;; Register state on exit: -;; Scratch registers, except for x0, have been trashed -;; All other registers restored as they were when the hijack was first reached. -;; - NESTED_ENTRY RhpGcStressProbe - PROLOG_PROBE_FRAME x2, x3, x12, - - bl $REDHAWKGCINTERFACE__STRESSGC - - EPILOG_PROBE_FRAME - NESTED_END RhpGcStressProbe -#endif ;; FEATURE_GC_STRESS - LEAF_ENTRY RhpGcProbe ldr x3, =RhpTrapThreads ldr w3, [x3] @@ -330,7 +295,41 @@ __PPF_ThreadReg SETS "x2" ret NESTED_END RhpGcPollRare + #ifdef FEATURE_GC_STRESS +;; +;; +;; GC Stress Hijack target +;; +;; + LEAF_ENTRY RhpGcStressHijack + FixupHijackedCallstack + orr x12, x12, #DEFAULT_FRAME_SAVE_FLAGS + b RhpGcStressProbe + LEAF_END RhpGcStressHijack +;; +;; Worker for our GC stress probes. Do not call directly!! +;; Instead, go through RhpGcStressHijack{Scalar|Object|Byref}. +;; This worker performs the GC Stress work and returns to the original return address. +;; +;; Register state on entry: +;; x0: hijacked function return value +;; x1: hijacked function return value +;; x2: thread pointer +;; w12: register bitmask +;; +;; Register state on exit: +;; Scratch registers, except for x0, have been trashed +;; All other registers restored as they were when the hijack was first reached. +;; + NESTED_ENTRY RhpGcStressProbe + PROLOG_PROBE_FRAME x2, x3, x12, + + bl $REDHAWKGCINTERFACE__STRESSGC + + EPILOG_PROBE_FRAME + NESTED_END RhpGcStressProbe + NESTED_ENTRY RhpHijackForGcStress ;; This function should be called from right before epilog diff --git a/src/coreclr/nativeaot/Runtime/thread.cpp b/src/coreclr/nativeaot/Runtime/thread.cpp index d3bc792639c30..706718c89d4a4 100644 --- a/src/coreclr/nativeaot/Runtime/thread.cpp +++ b/src/coreclr/nativeaot/Runtime/thread.cpp @@ -544,63 +544,17 @@ void Thread::GcScanRootsWorker(void * pfnEnumCallback, void * pvCallbackData, St #ifndef DACCESS_COMPILE EXTERN_C void FASTCALL RhpSuspendRedirected(); - -#if defined(TARGET_ARM64) || defined(TARGET_UNIX) EXTERN_C void FASTCALL RhpGcProbeHijack(); - -static void* NormalHijackTargets[1] = -{ - reinterpret_cast(RhpGcProbeHijack) -}; -#else // TARGET_ARM64 || TARGET_UNIX -EXTERN_C void FASTCALL RhpGcProbeHijackScalar(); -EXTERN_C void FASTCALL RhpGcProbeHijackObject(); -EXTERN_C void FASTCALL RhpGcProbeHijackByref(); - -static void* NormalHijackTargets[3] = -{ - reinterpret_cast(RhpGcProbeHijackScalar), // GCRK_Scalar = 0, - reinterpret_cast(RhpGcProbeHijackObject), // GCRK_Object = 1, - reinterpret_cast(RhpGcProbeHijackByref) // GCRK_Byref = 2, -}; -#endif // TARGET_ARM64 || TARGET_UNIX - -#ifdef FEATURE_GC_STRESS -#ifndef TARGET_ARM64 -EXTERN_C void FASTCALL RhpGcStressHijackScalar(); -EXTERN_C void FASTCALL RhpGcStressHijackObject(); -EXTERN_C void FASTCALL RhpGcStressHijackByref(); - -static void* GcStressHijackTargets[3] = -{ - reinterpret_cast(RhpGcStressHijackScalar), // GCRK_Scalar = 0, - reinterpret_cast(RhpGcStressHijackObject), // GCRK_Object = 1, - reinterpret_cast(RhpGcStressHijackByref) // GCRK_Byref = 2, -}; -#else // TARGET_ARM64 EXTERN_C void FASTCALL RhpGcStressHijack(); -static void* GcStressHijackTargets[1] = -{ - reinterpret_cast(RhpGcStressHijack) -}; -#endif // TARGET_ARM64 -#endif // FEATURE_GC_STRESS - // static -bool Thread::IsHijackTarget(void * address) +bool Thread::IsHijackTarget(void* address) { - for (size_t i = 0; i < ARRAY_SIZE(NormalHijackTargets); i++) - { - if (NormalHijackTargets[i] == address) + if (&RhpGcProbeHijack == address) return true; - } #ifdef FEATURE_GC_STRESS - for (size_t i = 0; i < ARRAY_SIZE(GcStressHijackTargets); i++) - { - if (GcStressHijackTargets[i] == address) + if (&RhpGcStressHijack == address) return true; - } #endif // FEATURE_GC_STRESS return false; } @@ -693,7 +647,7 @@ void Thread::HijackCallback(NATIVE_CONTEXT* pThreadContext, void* pThreadToHijac #endif //FEATURE_SUSPEND_REDIRECTION } - pThread->HijackReturnAddress(pThreadContext, NormalHijackTargets); + pThread->HijackReturnAddress(pThreadContext, &RhpGcProbeHijack); } #ifdef FEATURE_GC_STRESS @@ -734,7 +688,7 @@ void Thread::HijackForGcStress(PAL_LIMITED_CONTEXT * pSuspendCtx) } if (bForceGC || pInstance->ShouldHijackCallsiteForGcStress(ip)) { - pCurrentThread->HijackReturnAddress(pSuspendCtx, GcStressHijackTargets); + pCurrentThread->HijackReturnAddress(pSuspendCtx, &RhpGcStressHijack); } } #endif // FEATURE_GC_STRESS @@ -742,7 +696,7 @@ void Thread::HijackForGcStress(PAL_LIMITED_CONTEXT * pSuspendCtx) // This function is called from a thread to place a return hijack onto its own stack for GC stress cases // via Thread::HijackForGcStress above. The only constraint on the suspension is that the // stack be crawlable enough to yield the location of the return address. -void Thread::HijackReturnAddress(PAL_LIMITED_CONTEXT* pSuspendCtx, void* pvHijackTargets[]) +void Thread::HijackReturnAddress(PAL_LIMITED_CONTEXT* pSuspendCtx, HijackFunc* pfnHijackFunction) { if (IsDoNotTriggerGcSet()) return; @@ -753,7 +707,7 @@ void Thread::HijackReturnAddress(PAL_LIMITED_CONTEXT* pSuspendCtx, void* pvHijac return; } - HijackReturnAddressWorker(&frameIterator, pvHijackTargets); + HijackReturnAddressWorker(&frameIterator, pfnHijackFunction); } // This function is called in one of two scenarios: @@ -761,17 +715,17 @@ void Thread::HijackReturnAddress(PAL_LIMITED_CONTEXT* pSuspendCtx, void* pvHijac // thread is OS suspended at pSuspendCtx in managed code. // 2) from a thread to place a return hijack onto its own stack for GC suspension. In this case the target // thread is interrupted at pSuspendCtx in managed code via a signal or similar. -void Thread::HijackReturnAddress(NATIVE_CONTEXT* pSuspendCtx, void * pvHijackTargets[]) +void Thread::HijackReturnAddress(NATIVE_CONTEXT* pSuspendCtx, HijackFunc* pfnHijackFunction) { ASSERT(!IsDoNotTriggerGcSet()); StackFrameIterator frameIterator(this, pSuspendCtx); ASSERT(frameIterator.IsValid()); - HijackReturnAddressWorker(&frameIterator, pvHijackTargets); + HijackReturnAddressWorker(&frameIterator, pfnHijackFunction); } -void Thread::HijackReturnAddressWorker(StackFrameIterator* frameIterator, void* pvHijackTargets[]) +void Thread::HijackReturnAddressWorker(StackFrameIterator* frameIterator, HijackFunc* pfnHijackFunction) { PTR_PTR_VOID ppvRetAddrLocation; GCRefKind retValueKind; @@ -800,14 +754,8 @@ void Thread::HijackReturnAddressWorker(StackFrameIterator* frameIterator, void* m_ppvHijackedReturnAddressLocation = ppvRetAddrLocation; m_pvHijackedReturnAddress = pvRetAddr; -#if defined(TARGET_ARM64) || defined(TARGET_UNIX) m_uHijackedReturnValueFlags = ReturnKindToTransitionFrameFlags(retValueKind); - *ppvRetAddrLocation = pvHijackTargets[0]; -#else - void* pvHijackTarget = pvHijackTargets[retValueKind]; - ASSERT_MSG(IsHijackTarget(pvHijackTarget), "unexpected method used as hijack target"); - *ppvRetAddrLocation = pvHijackTarget; -#endif + *ppvRetAddrLocation = pfnHijackFunction; STRESS_LOG2(LF_STACKWALK, LL_INFO10000, "InternalHijack: TgtThread = %llx, IP = %p\n", GetPalThreadIdForLogging(), frameIterator->GetRegisterSet()->GetIP()); diff --git a/src/coreclr/nativeaot/Runtime/thread.h b/src/coreclr/nativeaot/Runtime/thread.h index 95cc8e5521c99..8b4b9a93b4907 100644 --- a/src/coreclr/nativeaot/Runtime/thread.h +++ b/src/coreclr/nativeaot/Runtime/thread.h @@ -142,10 +142,13 @@ class Thread : private ThreadBuffer void ClearState(ThreadStateFlags flags); bool IsStateSet(ThreadStateFlags flags); + static void HijackCallback(NATIVE_CONTEXT* pThreadContext, void* pThreadToHijack); - void HijackReturnAddress(PAL_LIMITED_CONTEXT* pSuspendCtx, void * pvHijackTargets[]); - void HijackReturnAddress(NATIVE_CONTEXT* pSuspendCtx, void* pvHijackTargets[]); - void HijackReturnAddressWorker(StackFrameIterator* frameIterator, void* pvHijackTargets[]); + + typedef void HijackFunc(); + void HijackReturnAddress(PAL_LIMITED_CONTEXT* pSuspendCtx, HijackFunc* pfnHijackFunction); + void HijackReturnAddress(NATIVE_CONTEXT* pSuspendCtx, HijackFunc* pfnHijackFunction); + void HijackReturnAddressWorker(StackFrameIterator* frameIterator, HijackFunc* pfnHijackFunction); bool InlineSuspend(NATIVE_CONTEXT* interruptedContext); #ifdef FEATURE_SUSPEND_REDIRECTION From 7d66dd238576959c3440ae074f00c397dfef82a0 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Wed, 20 Jul 2022 13:00:10 -0700 Subject: [PATCH 06/17] fix Unix build, tweak some comments --- src/coreclr/nativeaot/Runtime/amd64/GcProbe.S | 9 --------- src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm | 15 --------------- src/coreclr/nativeaot/Runtime/thread.cpp | 11 ++++------- src/coreclr/nativeaot/Runtime/thread.h | 14 ++++++-------- 4 files changed, 10 insertions(+), 39 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.S b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.S index ab917f3a86e14..39996e727c55e 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.S +++ b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.S @@ -91,32 +91,23 @@ pop rdx pop rax - // // Fix the stack by pushing the original return address - // mov rcx, [r11 + OFFSETOF__Thread__m_pvHijackedReturnAddress] push rcx // Fetch the return address flags mov rcx, [r11 + OFFSETOF__Thread__m_uHijackedReturnValueFlags] - // // Clear hijack state - // xor r9, r9 mov [r11 + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], r9 mov [r11 + OFFSETOF__Thread__m_pvHijackedReturnAddress], r9 mov [r11 + OFFSETOF__Thread__m_uHijackedReturnValueFlags], r9 - .endm -// -// // // GC Probe Hijack target // -// - NESTED_ENTRY RhpGcProbeHijack, _TEXT, NoHandler END_PROLOGUE FixupHijackedCallstack diff --git a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm index bbde9e632b817..e6ef827ebeb2c 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm @@ -3,9 +3,6 @@ include AsmMacros.inc -PROBE_SAVE_FLAGS_EVERYTHING equ DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_ALL_SCRATCH -PROBE_SAVE_FLAGS_RAX_IS_GCREF equ DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_GCREF - ;; ;; See PUSH_COOP_PINVOKE_FRAME, this macro is very similar, but also saves RAX and accepts the register ;; bitmask in RCX @@ -83,37 +80,27 @@ endm ;; RAX: preserved, other volatile regs trashed ;; FixupHijackedCallstack macro - ;; rdx <- GetThread(), TRASHES rcx INLINE_GETTHREAD rdx, rcx - ;; ;; Fix the stack by pushing the original return address - ;; mov rcx, [rdx + OFFSETOF__Thread__m_pvHijackedReturnAddress] push rcx ;; Fetch the return address flags mov rcx, [rdx + OFFSETOF__Thread__m_uHijackedReturnValueFlags] - ;; ;; Clear hijack state - ;; xor r9, r9 mov [rdx + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], r9 mov [rdx + OFFSETOF__Thread__m_pvHijackedReturnAddress], r9 mov [rdx + OFFSETOF__Thread__m_uHijackedReturnValueFlags], r9 - endm EXTERN RhpPInvokeExceptionGuard : PROC -;; -;; ;; ;; GC Probe Hijack target -;; - ;; NESTED_ENTRY RhpGcProbeHijack, _TEXT, RhpPInvokeExceptionGuard END_PROLOGUE @@ -169,11 +156,9 @@ NESTED_END RhpGcPollRare, _TEXT ifdef FEATURE_GC_STRESS -;; ;; ;; GC Stress Hijack targets ;; -;; LEAF_ENTRY RhpGcStressHijack, _TEXT FixupHijackedCallstack or ecx, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX diff --git a/src/coreclr/nativeaot/Runtime/thread.cpp b/src/coreclr/nativeaot/Runtime/thread.cpp index 706718c89d4a4..c8c6acea6b33e 100644 --- a/src/coreclr/nativeaot/Runtime/thread.cpp +++ b/src/coreclr/nativeaot/Runtime/thread.cpp @@ -727,7 +727,7 @@ void Thread::HijackReturnAddress(NATIVE_CONTEXT* pSuspendCtx, HijackFunc* pfnHij void Thread::HijackReturnAddressWorker(StackFrameIterator* frameIterator, HijackFunc* pfnHijackFunction) { - PTR_PTR_VOID ppvRetAddrLocation; + void** ppvRetAddrLocation; GCRefKind retValueKind; frameIterator->CalculateCurrentMethodState(); @@ -738,14 +738,11 @@ void Thread::HijackReturnAddressWorker(StackFrameIterator* frameIterator, Hijack { ASSERT(ppvRetAddrLocation != NULL); - // check if hijack location is the same + // if the new hijack location is the same, we do nothing if (m_ppvHijackedReturnAddressLocation == ppvRetAddrLocation) return; - // ARM64 epilogs have a window between loading the hijackable return address into LR and the RET instruction. - // We cannot hijack or unhijack a thread while it is suspended in that window unless we implement hijacking - // via LR register modification. Therefore it is important to check our ability to hijack the thread before - // unhijacking it. + // we only unhijack if we are going to install a new or better hijack. CrossThreadUnhijack(); void* pvRetAddr = *ppvRetAddrLocation; @@ -755,7 +752,7 @@ void Thread::HijackReturnAddressWorker(StackFrameIterator* frameIterator, Hijack m_ppvHijackedReturnAddressLocation = ppvRetAddrLocation; m_pvHijackedReturnAddress = pvRetAddr; m_uHijackedReturnValueFlags = ReturnKindToTransitionFrameFlags(retValueKind); - *ppvRetAddrLocation = pfnHijackFunction; + *ppvRetAddrLocation = (void*)pfnHijackFunction; STRESS_LOG2(LF_STACKWALK, LL_INFO10000, "InternalHijack: TgtThread = %llx, IP = %p\n", GetPalThreadIdForLogging(), frameIterator->GetRegisterSet()->GetIP()); diff --git a/src/coreclr/nativeaot/Runtime/thread.h b/src/coreclr/nativeaot/Runtime/thread.h index 8b4b9a93b4907..a09ddd0ae02c0 100644 --- a/src/coreclr/nativeaot/Runtime/thread.h +++ b/src/coreclr/nativeaot/Runtime/thread.h @@ -70,21 +70,14 @@ struct ThreadBuffer { uint8_t m_rgbAllocContextBuffer[SIZEOF_ALLOC_CONTEXT]; uint32_t volatile m_ThreadStateFlags; // see Thread::ThreadStateFlags enum -#if DACCESS_COMPILE - volatile PInvokeTransitionFrame* m_pTransitionFrame; -#else - PInvokeTransitionFrame* m_pTransitionFrame; -#endif PInvokeTransitionFrame* m_pDeferredTransitionFrame; // see Thread::EnablePreemptiveMode PInvokeTransitionFrame* m_pCachedTransitionFrame; PTR_Thread m_pNext; // used by ThreadStore's SList HANDLE m_hPalThread; // WARNING: this may legitimately be INVALID_HANDLE_VALUE void ** m_ppvHijackedReturnAddressLocation; void * m_pvHijackedReturnAddress; -#ifdef HOST_64BIT - uintptr_t m_uHijackedReturnValueFlags; // used on ARM64 and UNIX only; however, ARM64 and AMD64 share field offsets -#endif // HOST_64BIT + uintptr_t m_uHijackedReturnValueFlags; PTR_ExInfo m_pExInfoStackHead; Object* m_threadAbortException; // ThreadAbortException instance -set only during thread abort PTR_PTR_VOID m_pThreadLocalModuleStatics; @@ -145,7 +138,12 @@ class Thread : private ThreadBuffer static void HijackCallback(NATIVE_CONTEXT* pThreadContext, void* pThreadToHijack); + // + // Hijack funcs are not called, they are "returned to". And when done, they return to the actual caller. + // Thus they cannot have any parameters or return anything. + // typedef void HijackFunc(); + void HijackReturnAddress(PAL_LIMITED_CONTEXT* pSuspendCtx, HijackFunc* pfnHijackFunction); void HijackReturnAddress(NATIVE_CONTEXT* pSuspendCtx, HijackFunc* pfnHijackFunction); void HijackReturnAddressWorker(StackFrameIterator* frameIterator, HijackFunc* pfnHijackFunction); From 23fdd0a612deee9d4aef72e6339d9a6e859fe2b9 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Wed, 20 Jul 2022 13:47:33 -0700 Subject: [PATCH 07/17] check the trap flag in RhpGcProbeHijack --- src/coreclr/nativeaot/Runtime/amd64/GcProbe.S | 10 ++++++---- src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm | 9 +++++---- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.S b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.S index 39996e727c55e..6db688d869911 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.S +++ b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.S @@ -111,15 +111,17 @@ NESTED_ENTRY RhpGcProbeHijack, _TEXT, NoHandler END_PROLOGUE FixupHijackedCallstack + + test dword ptr [C_VAR(RhpTrapThreads)], TrapThreadsFlags_TrapThreads + jnz LOCAL_LABEL(DoRhpGcProbe) + ret + +LOCAL_LABEL(DoRhpGcProbe): or ecx, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_SAVE_RDX jmp C_FUNC(RhpGcProbe) NESTED_END RhpGcProbeHijack, _TEXT NESTED_ENTRY RhpGcProbe, _TEXT, NoHandler - test dword ptr [C_VAR(RhpTrapThreads)], TrapThreadsFlags_TrapThreads - jnz LOCAL_LABEL(RhpGcProbe_Trap) - ret -LOCAL_LABEL(RhpGcProbe_Trap): PUSH_PROBE_FRAME r11, rax, rcx END_PROLOGUE diff --git a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm index e6ef827ebeb2c..f8cce7a9a2f1c 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm @@ -105,6 +105,11 @@ EXTERN RhpPInvokeExceptionGuard : PROC NESTED_ENTRY RhpGcProbeHijack, _TEXT, RhpPInvokeExceptionGuard END_PROLOGUE FixupHijackedCallstack + + test [RhpTrapThreads], TrapThreadsFlags_TrapThreads + jnz @f + ret +@@: or ecx, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX jmp RhpGcProbe NESTED_END RhpGcProbeHijack, _TEXT @@ -112,10 +117,6 @@ NESTED_END RhpGcProbeHijack, _TEXT EXTERN RhpThrowHwEx : PROC NESTED_ENTRY RhpGcProbe, _TEXT - test [RhpTrapThreads], TrapThreadsFlags_TrapThreads - jnz @f - ret -@@: PUSH_PROBE_FRAME rdx, rax, rcx END_PROLOGUE From 8c23ae6ae31b7c713ce53677783c392339671a3d Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Wed, 20 Jul 2022 15:09:03 -0700 Subject: [PATCH 08/17] add a stub for RhpGcStressHijack on Unix --- src/coreclr/nativeaot/Runtime/portable.cpp | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/portable.cpp b/src/coreclr/nativeaot/Runtime/portable.cpp index af11b2acf8a09..bb540d11588bd 100644 --- a/src/coreclr/nativeaot/Runtime/portable.cpp +++ b/src/coreclr/nativeaot/Runtime/portable.cpp @@ -406,15 +406,7 @@ void * ReturnFromCallDescrThunk; // Return address hijacking // #if !defined (HOST_ARM64) -COOP_PINVOKE_HELPER(void, RhpGcStressHijackScalar, ()) -{ - ASSERT_UNCONDITIONALLY("NYI"); -} -COOP_PINVOKE_HELPER(void, RhpGcStressHijackObject, ()) -{ - ASSERT_UNCONDITIONALLY("NYI"); -} -COOP_PINVOKE_HELPER(void, RhpGcStressHijackByref, ()) +COOP_PINVOKE_HELPER(void, RhpGcStressHijack, ()) { ASSERT_UNCONDITIONALLY("NYI"); } From 8faffe278a13f4dd3132bf1e4723cd049060c7a6 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Wed, 20 Jul 2022 17:25:21 -0700 Subject: [PATCH 09/17] save flags for windows x64 should not have rdx --- src/coreclr/nativeaot/Runtime/ICodeManager.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/coreclr/nativeaot/Runtime/ICodeManager.h b/src/coreclr/nativeaot/Runtime/ICodeManager.h index 3477ba1f932ca..b3c1e5f39d5e6 100644 --- a/src/coreclr/nativeaot/Runtime/ICodeManager.h +++ b/src/coreclr/nativeaot/Runtime/ICodeManager.h @@ -79,7 +79,11 @@ inline uint64_t ReturnKindToTransitionFrameFlags(GCRefKind returnKind) if (returnKind == GCRK_Scalar) return 0; +#if defined(TARGET_UNIX) return PTFF_SAVE_RAX | PTFF_SAVE_RDX | ((uint64_t)returnKind << 16); +#else + return PTFF_SAVE_RAX | ((uint64_t)returnKind << 16); +#endif } inline GCRefKind TransitionFrameFlagsToReturnKind(uint64_t transFrameFlags) From 75a01b902522dd5ada66d227ed8efecfd3eeafdd Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Wed, 20 Jul 2022 17:59:42 -0700 Subject: [PATCH 10/17] not saving scratch registers on win-arm64 --- .../nativeaot/Runtime/arm64/GcProbe.asm | 150 +++++------------- 1 file changed, 40 insertions(+), 110 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm index 0075023cc849f..81755f15a2434 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm +++ b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm @@ -8,17 +8,14 @@ EXTERN RhpGcPoll2 EXTERN g_fGcStressStarted -PROBE_SAVE_FLAGS_EVERYTHING equ DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_ALL_SCRATCH + PTFF_SAVE_LR - ;; Build a map of symbols representing offsets into the transition frame (see PInvokeTransitionFrame in ;; rhbinder.h) and keep these two in sync. map 0 field OFFSETOF__PInvokeTransitionFrame__m_PreservedRegs field 10 * 8 ; x19..x28 m_CallersSP field 8 ; SP at routine entry - field 19 * 8 ; x0..x18 - field 8 ; lr -m_SavedNZCV field 8 ; Saved condition flags + field 2 * 8 ; x0..x1 + field 8 ; alignment padding field 4 * 8 ; d0..d3 PROBE_FRAME_SIZE field 0 @@ -29,20 +26,13 @@ PROBE_FRAME_SIZE field 0 ;; defined below. For the special cases where additional work has to be done in the prolog we also provide ;; the lower level macros ALLOC_PROBE_FRAME, FREE_PROBE_FRAME and INIT_PROBE_FRAME that allow more control ;; to be asserted. - ;; - ;; Note that we currently employ a significant simplification of frame setup: we always allocate a - ;; maximally-sized PInvokeTransitionFrame and save all of the registers. Depending on the caller this can - ;; lead to up to 20 additional register saves (x0-x18, lr) or 160 bytes of stack space. I have done no - ;; analysis to see whether any of the worst cases occur on performance sensitive paths and whether the - ;; additional saves will show any measurable degradation. - ;; Perform the parts of setting up a probe frame that can occur during the prolog (and indeed this macro ;; can only be called from within the prolog). MACRO - ALLOC_PROBE_FRAME $extraStackSpace, $saveFPRegisters + ALLOC_PROBE_FRAME ;; First create PInvokeTransitionFrame - PROLOG_SAVE_REG_PAIR fp, lr, #-(PROBE_FRAME_SIZE + $extraStackSpace)! ;; Push down stack pointer and store FP and LR + PROLOG_SAVE_REG_PAIR fp, lr, #-(PROBE_FRAME_SIZE)! ;; Push down stack pointer and store FP and LR ;; Slot at [sp, #0x10] is reserved for Thread * ;; Slot at [sp, #0x18] is reserved for bitmask of saved registers @@ -56,52 +46,30 @@ PROBE_FRAME_SIZE field 0 ;; Slot at [sp, #0x70] is reserved for caller sp - ;; Save the scratch registers + ;; Save the integer return registers PROLOG_NOP str x0, [sp, #0x78] - PROLOG_NOP stp x1, x2, [sp, #0x80] - PROLOG_NOP stp x3, x4, [sp, #0x90] - PROLOG_NOP stp x5, x6, [sp, #0xA0] - PROLOG_NOP stp x7, x8, [sp, #0xB0] - PROLOG_NOP stp x9, x10, [sp, #0xC0] - PROLOG_NOP stp x11, x12, [sp, #0xD0] - PROLOG_NOP stp x13, x14, [sp, #0xE0] - PROLOG_NOP stp x15, x16, [sp, #0xF0] - PROLOG_NOP stp x17, x18, [sp, #0x100] - PROLOG_NOP str lr, [sp, #0x110] - - ;; Slot at [sp, #0x118] is reserved for NZCV + PROLOG_NOP str x1, [sp, #0x80] + + ;; Slot at [sp, #0x88] is alignment padding ;; Save the floating return registers - IF $saveFPRegisters - PROLOG_NOP stp d0, d1, [sp, #0x120] - PROLOG_NOP stp d2, d3, [sp, #0x130] - ENDIF + PROLOG_NOP stp d0, d1, [sp, #0x90] + PROLOG_NOP stp d2, d3, [sp, #0xA0] MEND ;; Undo the effects of an ALLOC_PROBE_FRAME. This may only be called within an epilog. Note that all ;; registers are restored (apart for sp and pc), even volatiles. MACRO - FREE_PROBE_FRAME $extraStackSpace, $restoreFPRegisters + FREE_PROBE_FRAME - ;; Restore the scratch registers + ;; Restore the integer return registers PROLOG_NOP ldr x0, [sp, #0x78] - PROLOG_NOP ldp x1, x2, [sp, #0x80] - PROLOG_NOP ldp x3, x4, [sp, #0x90] - PROLOG_NOP ldp x5, x6, [sp, #0xA0] - PROLOG_NOP ldp x7, x8, [sp, #0xB0] - PROLOG_NOP ldp x9, x10, [sp, #0xC0] - PROLOG_NOP ldp x11, x12, [sp, #0xD0] - PROLOG_NOP ldp x13, x14, [sp, #0xE0] - PROLOG_NOP ldp x15, x16, [sp, #0xF0] - PROLOG_NOP ldp x17, x18, [sp, #0x100] - PROLOG_NOP ldr lr, [sp, #0x110] + PROLOG_NOP ldr x1, [sp, #0x80] ; Restore the floating return registers - IF $restoreFPRegisters - EPILOG_NOP ldp d0, d1, [sp, #0x120] - EPILOG_NOP ldp d2, d3, [sp, #0x130] - ENDIF + EPILOG_NOP ldp d0, d1, [sp, #0x90] + EPILOG_NOP ldp d2, d3, [sp, #0xA0] ;; Restore callee saved registers EPILOG_RESTORE_REG_PAIR x19, x20, #0x20 @@ -110,7 +78,7 @@ PROBE_FRAME_SIZE field 0 EPILOG_RESTORE_REG_PAIR x25, x26, #0x50 EPILOG_RESTORE_REG_PAIR x27, x28, #0x60 - EPILOG_RESTORE_REG_PAIR fp, lr, #(PROBE_FRAME_SIZE + $extraStackSpace)! + EPILOG_RESTORE_REG_PAIR fp, lr, #(PROBE_FRAME_SIZE)! MEND ;; Complete the setup of a probe frame allocated with ALLOC_PROBE_FRAME with the initialization that can @@ -119,72 +87,34 @@ PROBE_FRAME_SIZE field 0 ;; ;; $threadReg : register containing the Thread* (this will be preserved) ;; $trashReg : register that can be trashed by this macro - ;; $savedRegsMask : value to initialize m_Flags field with (register or #constant) - ;; $gcFlags : value of gcref / gcbyref flags for saved registers, used only if $savedRegsMask is constant - ;; $frameSize : total size of the method's stack frame (including probe frame size) + ;; $savedRegsMask : register containing flags MACRO - INIT_PROBE_FRAME $threadReg, $trashReg, $savedRegsMask, $gcFlags, $frameSize - - LCLS BitmaskStr -BitmaskStr SETS "$savedRegsMask" + INIT_PROBE_FRAME $threadReg, $trashReg, $savedRegsMask str $threadReg, [sp, #OFFSETOF__PInvokeTransitionFrame__m_pThread] ; Thread * - IF BitmaskStr:LEFT:1 == "#" - ;; The savedRegsMask is a constant, remove the leading "#" since the MOVL64 doesn't expect it -BitmaskStr SETS BitmaskStr:RIGHT:(:LEN:BitmaskStr - 1) - MOVL64 $trashReg, $BitmaskStr, $gcFlags - ELSE - ASSERT "$gcFlags" == "" - ;; The savedRegsMask is a register - mov $trashReg, $savedRegsMask - ENDIF - str $trashReg, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags] - add $trashReg, sp, #$frameSize + str $savedRegsMask, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags] + add $trashReg, sp, #PROBE_FRAME_SIZE str $trashReg, [sp, #m_CallersSP] MEND ;; Simple macro to use when setting up the probe frame can comprise the entire prolog. Call this macro ;; first in the method (no further prolog instructions can be added after this). ;; - ;; $threadReg : register containing the Thread* (this will be preserved). If defaulted (specify |) then - ;; the current thread will be calculated inline into r2 ($trashReg must not equal r2 in - ;; this case) + ;; $threadReg : register containing the Thread* (this will be preserved). ;; $trashReg : register that can be trashed by this macro ;; $savedRegsMask : value to initialize m_dwFlags field with (register or #constant) - ;; $gcFlags : value of gcref / gcbyref flags for saved registers, used only if $savedRegsMask is constant MACRO - PROLOG_PROBE_FRAME $threadReg, $trashReg, $savedRegsMask, $gcFlags - - ; Local string tracking the name of the register in which the Thread* is kept. Defaults to the value - ; of $threadReg. - LCLS __PPF_ThreadReg -__PPF_ThreadReg SETS "$threadReg" + PROLOG_PROBE_FRAME $threadReg, $trashReg, $savedRegsMask ; Define the method prolog, allocating enough stack space for the PInvokeTransitionFrame and saving ; incoming register values into it. - ALLOC_PROBE_FRAME 0, {true} + ALLOC_PROBE_FRAME - ; If the caller didn't provide a value for $threadReg then generate code to fetch the Thread* into x2. - ; Record that x2 holds the Thread* in our local variable. - IF "$threadReg" == "" - ASSERT "$trashReg" != "x2" -__PPF_ThreadReg SETS "x2" - INLINE_GETTHREAD $__PPF_ThreadReg, $trashReg - ENDIF ; Perform the rest of the PInvokeTransitionFrame initialization. - INIT_PROBE_FRAME $__PPF_ThreadReg, $trashReg, $savedRegsMask, $gcFlags, PROBE_FRAME_SIZE + INIT_PROBE_FRAME $threadReg, $trashReg, $savedRegsMask mov $trashReg, sp - str $trashReg, [$__PPF_ThreadReg, #OFFSETOF__Thread__m_pDeferredTransitionFrame] - MEND - - ; Simple macro to use when PROLOG_PROBE_FRAME was used to set up and initialize the prolog and - ; PInvokeTransitionFrame. This will define the epilog including a return via the restored LR. - MACRO - EPILOG_PROBE_FRAME - - FREE_PROBE_FRAME 0, {true} - EPILOG_RETURN + str $trashReg, [$threadReg, #OFFSETOF__Thread__m_pDeferredTransitionFrame] MEND ;; @@ -248,23 +178,22 @@ __PPF_ThreadReg SETS "x2" HijackTargetFakeProlog LABELED_RETURN_ADDRESS RhpGcProbeHijack - FixupHijackedCallstack - orr x12, x12, #DEFAULT_FRAME_SAVE_FLAGS - b RhpGcProbe - NESTED_END RhpGcProbeHijackWrapper - LEAF_ENTRY RhpGcProbe ldr x3, =RhpTrapThreads ldr w3, [x3] - tbnz x3, #TrapThreadsFlags_TrapThreads_Bit, RhpGcProbeRare + tbnz x3, #TrapThreadsFlags_TrapThreads_Bit, DoRhpGcProbe ret - LEAF_END RhpGcProbe + +DoRhpGcProbe + orr x12, x12, #DEFAULT_FRAME_SAVE_FLAGS + b RhpGcProbe + NESTED_END RhpGcProbeHijackWrapper EXTERN RhpThrowHwEx - NESTED_ENTRY RhpGcProbeRare - PROLOG_PROBE_FRAME x2, x3, x12, + NESTED_ENTRY RhpGcProbe + PROLOG_PROBE_FRAME x2, x3, x12 ldr x0, [x2, #OFFSETOF__Thread__m_pDeferredTransitionFrame] bl RhpWaitForGC2 @@ -272,14 +201,14 @@ __PPF_ThreadReg SETS "x2" ldr x2, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags] tbnz x2, #PTFF_THREAD_ABORT_BIT, %F1 - EPILOG_PROBE_FRAME - + FREE_PROBE_FRAME + EPILOG_RETURN 1 - FREE_PROBE_FRAME 0, {true} + FREE_PROBE_FRAME EPILOG_NOP mov w0, #STATUS_REDHAWK_THREAD_ABORT EPILOG_NOP mov x1, lr ;; return address as exception PC EPILOG_NOP b RhpThrowHwEx - NESTED_END RhpGcProbeRare + NESTED_END RhpGcProbe LEAF_ENTRY RhpGcPoll ldr x0, =RhpTrapThreads @@ -323,11 +252,12 @@ __PPF_ThreadReg SETS "x2" ;; All other registers restored as they were when the hijack was first reached. ;; NESTED_ENTRY RhpGcStressProbe - PROLOG_PROBE_FRAME x2, x3, x12, + PROLOG_PROBE_FRAME x2, x3, x12 bl $REDHAWKGCINTERFACE__STRESSGC - EPILOG_PROBE_FRAME + FREE_PROBE_FRAME + EPILOG_RETURN NESTED_END RhpGcStressProbe NESTED_ENTRY RhpHijackForGcStress From c321a96bcdda6f4404548bf96b2b16a6b35bf992 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Thu, 21 Jul 2022 00:01:17 -0700 Subject: [PATCH 11/17] PUSH_PROBE_FRAME on arm64 --- .../nativeaot/Runtime/amd64/GcProbe.asm | 10 +-- .../nativeaot/Runtime/arm64/GcProbe.asm | 81 ++++++------------- 2 files changed, 31 insertions(+), 60 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm index f8cce7a9a2f1c..c06949cdc3f22 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm @@ -4,11 +4,11 @@ include AsmMacros.inc ;; -;; See PUSH_COOP_PINVOKE_FRAME, this macro is very similar, but also saves RAX and accepts the register -;; bitmask in RCX +;; See PUSH_COOP_PINVOKE_FRAME, this macro is very similar, but also saves RAX and accepts +;; the register bitmask ;; ;; On entry: -;; - BITMASK: bitmask describing pushes, may be volatile register or constant value +;; - BITMASK: bitmask describing pushes, a volatile register ;; - RAX: managed function return value, may be an object or byref ;; - preserved regs: need to stay preserved, may contain objects or byrefs ;; @@ -31,7 +31,7 @@ PUSH_PROBE_FRAME macro threadReg, trashReg, BITMASK push_vol_reg BITMASK ; save the register bitmask passed in by caller push_vol_reg threadReg ; Thread * (unused by stackwalker) push_nonvol_reg rbp ; save caller's RBP - mov trashReg, [rsp + 12*8] ; Find the return address + mov trashReg, [rsp + 12*8] ; Find the return address push_vol_reg trashReg ; save m_RIP lea trashReg, [rsp + 0] ; trashReg == address of frame @@ -41,7 +41,7 @@ PUSH_PROBE_FRAME macro threadReg, trashReg, BITMASK ;; save xmm0 in case it's being used as a return value movdqa [rsp + 20h], xmm0 - ; link the frame into the Thread + ;; link the frame into the Thread mov [threadReg + OFFSETOF__Thread__m_pDeferredTransitionFrame], trashReg endm diff --git a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm index 81755f15a2434..36bed49616650 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm +++ b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm @@ -19,17 +19,18 @@ m_CallersSP field 8 ; SP at routine entry field 4 * 8 ; d0..d3 PROBE_FRAME_SIZE field 0 - ;; Support for setting up a transition frame when performing a GC probe. In many respects this is very - ;; similar to the logic in PUSH_COOP_PINVOKE_FRAME in AsmMacros.h. In most cases setting up the - ;; transition frame comprises the entirety of the caller's prolog (and initial non-prolog code) and - ;; similarly for the epilog. Those cases can be dealt with using PROLOG_PROBE_FRAME and EPILOG_PROBE_FRAME - ;; defined below. For the special cases where additional work has to be done in the prolog we also provide - ;; the lower level macros ALLOC_PROBE_FRAME, FREE_PROBE_FRAME and INIT_PROBE_FRAME that allow more control - ;; to be asserted. - ;; Perform the parts of setting up a probe frame that can occur during the prolog (and indeed this macro - ;; can only be called from within the prolog). + ;; See PUSH_COOP_PINVOKE_FRAME, this macro is very similar, but also saves return registers + ;; and accepts the register bitmask + ;; Call this macro first in the method (no further prolog instructions can be added after this). + ;; + ;; $threadReg : register containing the Thread* (this will be preserved). + ;; $trashReg : register that can be trashed by this macro + ;; $BITMASK : value to initialize m_dwFlags field with (register or #constant) MACRO - ALLOC_PROBE_FRAME + PUSH_PROBE_FRAME $threadReg, $trashReg, $BITMASK + + ; Define the method prolog, allocating enough stack space for the PInvokeTransitionFrame and saving + ; incoming register values into it. ;; First create PInvokeTransitionFrame PROLOG_SAVE_REG_PAIR fp, lr, #-(PROBE_FRAME_SIZE)! ;; Push down stack pointer and store FP and LR @@ -56,12 +57,21 @@ PROBE_FRAME_SIZE field 0 PROLOG_NOP stp d0, d1, [sp, #0x90] PROLOG_NOP stp d2, d3, [sp, #0xA0] + ;; Perform the rest of the PInvokeTransitionFrame initialization. + str $BITMASK, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags] ; save the register bitmask passed in by caller + str $threadReg,[sp, #OFFSETOF__PInvokeTransitionFrame__m_pThread] ; Thread * (unused by stackwalker) + add $trashReg, sp, #PROBE_FRAME_SIZE ; recover value of caller's SP + str $trashReg, [sp, #m_CallersSP] ; save caller's SP + + ;; link the frame into the Thread + mov $trashReg, sp + str $trashReg, [$threadReg, #OFFSETOF__Thread__m_pDeferredTransitionFrame] MEND ;; Undo the effects of an ALLOC_PROBE_FRAME. This may only be called within an epilog. Note that all ;; registers are restored (apart for sp and pc), even volatiles. MACRO - FREE_PROBE_FRAME + POP_PROBE_FRAME ;; Restore the integer return registers PROLOG_NOP ldr x0, [sp, #0x78] @@ -81,42 +91,6 @@ PROBE_FRAME_SIZE field 0 EPILOG_RESTORE_REG_PAIR fp, lr, #(PROBE_FRAME_SIZE)! MEND - ;; Complete the setup of a probe frame allocated with ALLOC_PROBE_FRAME with the initialization that can - ;; occur only outside the prolog (includes linking the frame to the current Thread). This macro assumes SP - ;; is invariant outside of the prolog. - ;; - ;; $threadReg : register containing the Thread* (this will be preserved) - ;; $trashReg : register that can be trashed by this macro - ;; $savedRegsMask : register containing flags - MACRO - INIT_PROBE_FRAME $threadReg, $trashReg, $savedRegsMask - - str $threadReg, [sp, #OFFSETOF__PInvokeTransitionFrame__m_pThread] ; Thread * - str $savedRegsMask, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags] - add $trashReg, sp, #PROBE_FRAME_SIZE - str $trashReg, [sp, #m_CallersSP] - MEND - - ;; Simple macro to use when setting up the probe frame can comprise the entire prolog. Call this macro - ;; first in the method (no further prolog instructions can be added after this). - ;; - ;; $threadReg : register containing the Thread* (this will be preserved). - ;; $trashReg : register that can be trashed by this macro - ;; $savedRegsMask : value to initialize m_dwFlags field with (register or #constant) - MACRO - PROLOG_PROBE_FRAME $threadReg, $trashReg, $savedRegsMask - - ; Define the method prolog, allocating enough stack space for the PInvokeTransitionFrame and saving - ; incoming register values into it. - ALLOC_PROBE_FRAME - - - ; Perform the rest of the PInvokeTransitionFrame initialization. - INIT_PROBE_FRAME $threadReg, $trashReg, $savedRegsMask - mov $trashReg, sp - str $trashReg, [$threadReg, #OFFSETOF__Thread__m_pDeferredTransitionFrame] - MEND - ;; ;; The prolog for all GC suspension hijacks (normal and stress). Fixes up the hijacked return address, and ;; clears the hijack state. @@ -166,11 +140,8 @@ PROBE_FRAME_SIZE field 0 MEND -;; -;; ;; ;; GC Probe Hijack target -;; ;; EXTERN RhpPInvokeExceptionGuard @@ -193,7 +164,7 @@ DoRhpGcProbe EXTERN RhpThrowHwEx NESTED_ENTRY RhpGcProbe - PROLOG_PROBE_FRAME x2, x3, x12 + PUSH_PROBE_FRAME x2, x3, x12 ldr x0, [x2, #OFFSETOF__Thread__m_pDeferredTransitionFrame] bl RhpWaitForGC2 @@ -201,10 +172,10 @@ DoRhpGcProbe ldr x2, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags] tbnz x2, #PTFF_THREAD_ABORT_BIT, %F1 - FREE_PROBE_FRAME + POP_PROBE_FRAME EPILOG_RETURN 1 - FREE_PROBE_FRAME + POP_PROBE_FRAME EPILOG_NOP mov w0, #STATUS_REDHAWK_THREAD_ABORT EPILOG_NOP mov x1, lr ;; return address as exception PC EPILOG_NOP b RhpThrowHwEx @@ -252,11 +223,11 @@ DoRhpGcProbe ;; All other registers restored as they were when the hijack was first reached. ;; NESTED_ENTRY RhpGcStressProbe - PROLOG_PROBE_FRAME x2, x3, x12 + PUSH_PROBE_FRAME x2, x3, x12 bl $REDHAWKGCINTERFACE__STRESSGC - FREE_PROBE_FRAME + POP_PROBE_FRAME EPILOG_RETURN NESTED_END RhpGcStressProbe From e8eea2b8e0a3ae818523d52cc9ac2f48cda99dd2 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Thu, 21 Jul 2022 11:56:51 -0700 Subject: [PATCH 12/17] couple comments --- src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm | 9 ++++++--- .../nativeaot/Runtime/unix/unixasmmacrosamd64.inc | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm index 36bed49616650..5a2f5abac5d5c 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm +++ b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm @@ -68,8 +68,11 @@ PROBE_FRAME_SIZE field 0 str $trashReg, [$threadReg, #OFFSETOF__Thread__m_pDeferredTransitionFrame] MEND - ;; Undo the effects of an ALLOC_PROBE_FRAME. This may only be called within an epilog. Note that all - ;; registers are restored (apart for sp and pc), even volatiles. +;; +;; Remove the frame from a previous call to PUSH_PROBE_FRAME from the top of the stack and restore preserved +;; registers and return value to their values from before the probe was called (while also updating any +;; object refs or byrefs). +;; MACRO POP_PROBE_FRAME @@ -209,7 +212,7 @@ DoRhpGcProbe LEAF_END RhpGcStressHijack ;; ;; Worker for our GC stress probes. Do not call directly!! -;; Instead, go through RhpGcStressHijack{Scalar|Object|Byref}. +;; Instead, go through RhpGcStressHijack. ;; This worker performs the GC Stress work and returns to the original return address. ;; ;; Register state on entry: diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc index 260a2ca533dc4..1aaf7c53ff615 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc @@ -320,7 +320,7 @@ C_FUNC(\Name): DEFAULT_FRAME_SAVE_FLAGS = PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_RSP .macro PUSH_COOP_PINVOKE_FRAME trashReg - push_nonvol_reg rbp // push RBP frame // TODO: do we need this? not on windows. + push_nonvol_reg rbp // push RBP frame mov rbp, rsp lea \trashReg, [rsp + 0x10] push_register \trashReg // save caller's RSP From d30547122a69c8eae270391220d65b8e5a1cf30b Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Thu, 21 Jul 2022 12:23:34 -0700 Subject: [PATCH 13/17] make RhpGcProbeHijack responsible for setting PTFF_SAVE_ bits --- src/coreclr/nativeaot/Runtime/ICodeManager.h | 18 ++++++------------ .../nativeaot/Runtime/arm64/GcProbe.asm | 4 ++-- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/ICodeManager.h b/src/coreclr/nativeaot/Runtime/ICodeManager.h index b3c1e5f39d5e6..3671b6f683905 100644 --- a/src/coreclr/nativeaot/Runtime/ICodeManager.h +++ b/src/coreclr/nativeaot/Runtime/ICodeManager.h @@ -53,10 +53,9 @@ C_ASSERT(PTFF_X1_IS_BYREF == ((uint64_t)GCRK_Scalar_Byref << 32)); inline uint64_t ReturnKindToTransitionFrameFlags(GCRefKind returnKind) { - if (returnKind == GCRK_Scalar) - return 0; - - return PTFF_SAVE_X0 | PTFF_SAVE_X1 | ((uint64_t)returnKind << 32); + // just need to report gc ref bits here. + // appropriate PTFF_SAVE_ bits will be added by the frame building routine. + return ((uint64_t)returnKind << 32); } inline GCRefKind TransitionFrameFlagsToReturnKind(uint64_t transFrameFlags) @@ -76,14 +75,9 @@ C_ASSERT(PTFF_RDX_IS_BYREF == ((uint64_t)GCRK_Scalar_Byref << 16)); inline uint64_t ReturnKindToTransitionFrameFlags(GCRefKind returnKind) { - if (returnKind == GCRK_Scalar) - return 0; - -#if defined(TARGET_UNIX) - return PTFF_SAVE_RAX | PTFF_SAVE_RDX | ((uint64_t)returnKind << 16); -#else - return PTFF_SAVE_RAX | ((uint64_t)returnKind << 16); -#endif + // just need to report gc ref bits here. + // appropriate PTFF_SAVE_ bits will be added by the frame building routine. + return ((uint64_t)returnKind << 16); } inline GCRefKind TransitionFrameFlagsToReturnKind(uint64_t transFrameFlags) diff --git a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm index 5a2f5abac5d5c..1523a4770c879 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm +++ b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm @@ -160,7 +160,7 @@ PROBE_FRAME_SIZE field 0 ret DoRhpGcProbe - orr x12, x12, #DEFAULT_FRAME_SAVE_FLAGS + orr x12, x12, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_X0 + PTFF_SAVE_X1) b RhpGcProbe NESTED_END RhpGcProbeHijackWrapper @@ -207,7 +207,7 @@ DoRhpGcProbe ;; LEAF_ENTRY RhpGcStressHijack FixupHijackedCallstack - orr x12, x12, #DEFAULT_FRAME_SAVE_FLAGS + orr x12, x12, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_X0 + PTFF_SAVE_X1) b RhpGcStressProbe LEAF_END RhpGcStressHijack ;; From bc03016091bc11a4cbcd558789c12ce0b1081c31 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Thu, 21 Jul 2022 23:30:31 -0700 Subject: [PATCH 14/17] revert `RhpReversePInvokeAttachOrTrapThread2` change --- src/coreclr/nativeaot/Runtime/thread.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/coreclr/nativeaot/Runtime/thread.cpp b/src/coreclr/nativeaot/Runtime/thread.cpp index c8c6acea6b33e..eacc0d2514ae3 100644 --- a/src/coreclr/nativeaot/Runtime/thread.cpp +++ b/src/coreclr/nativeaot/Runtime/thread.cpp @@ -1340,6 +1340,12 @@ COOP_PINVOKE_HELPER(uint64_t, RhCurrentOSThreadId, ()) return PalGetCurrentThreadIdForLogging(); } +// Standard calling convention variant and actual implementation for RhpReversePInvokeAttachOrTrapThread +EXTERN_C NOINLINE void FASTCALL RhpReversePInvokeAttachOrTrapThread2(ReversePInvokeFrame* pFrame) +{ + ASSERT(pFrame->m_savedThread == ThreadStore::RawGetCurrentThread()); + pFrame->m_savedThread->ReversePInvokeAttachOrTrapThread(pFrame); +} // // PInvoke @@ -1352,7 +1358,7 @@ COOP_PINVOKE_HELPER(void, RhpReversePInvoke, (ReversePInvokeFrame * pFrame)) if (pCurThread->InlineTryFastReversePInvoke(pFrame)) return; - pCurThread->ReversePInvokeAttachOrTrapThread(pFrame); + RhpReversePInvokeAttachOrTrapThread2(pFrame); } COOP_PINVOKE_HELPER(void, RhpReversePInvokeReturn, (ReversePInvokeFrame * pFrame)) From 7b260acddbc0a2dd832d7309acf4f117ac12a35e Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Thu, 21 Jul 2022 23:31:10 -0700 Subject: [PATCH 15/17] fix indentation --- src/coreclr/nativeaot/Runtime/thread.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/thread.cpp b/src/coreclr/nativeaot/Runtime/thread.cpp index eacc0d2514ae3..e743d3f674a73 100644 --- a/src/coreclr/nativeaot/Runtime/thread.cpp +++ b/src/coreclr/nativeaot/Runtime/thread.cpp @@ -550,11 +550,11 @@ EXTERN_C void FASTCALL RhpGcStressHijack(); // static bool Thread::IsHijackTarget(void* address) { - if (&RhpGcProbeHijack == address) - return true; + if (&RhpGcProbeHijack == address) + return true; #ifdef FEATURE_GC_STRESS - if (&RhpGcStressHijack == address) - return true; + if (&RhpGcStressHijack == address) + return true; #endif // FEATURE_GC_STRESS return false; } From 38e9c17657d23b675779cc8935bf71a95b260aef Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Thu, 21 Jul 2022 23:52:59 -0700 Subject: [PATCH 16/17] made 32bit RhpGcStressHijack similar to 64bit counterparts (as much as can be done without trying to compile and test) --- src/coreclr/nativeaot/Runtime/arm/GcProbe.asm | 68 ++++--------------- .../nativeaot/Runtime/i386/GcProbe.asm | 50 +++----------- 2 files changed, 23 insertions(+), 95 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/arm/GcProbe.asm b/src/coreclr/nativeaot/Runtime/arm/GcProbe.asm index 3f69f449a18da..230c03de976ea 100644 --- a/src/coreclr/nativeaot/Runtime/arm/GcProbe.asm +++ b/src/coreclr/nativeaot/Runtime/arm/GcProbe.asm @@ -189,38 +189,23 @@ __PPF_ThreadReg SETS "r2" EXTERN RhpPInvokeExceptionGuard - NESTED_ENTRY RhpGcProbeHijackScalarWrapper, .text, RhpPInvokeExceptionGuard + NESTED_ENTRY RhpGcProbeHijackWrapper, .text, RhpPInvokeExceptionGuard HijackTargetFakeProlog - LABELED_RETURN_ADDRESS RhpGcProbeHijackScalar + LABELED_RETURN_ADDRESS RhpGcProbeHijack FixupHijackedCallstack - mov r12, #DEFAULT_FRAME_SAVE_FLAGS - b RhpGcProbe - NESTED_END RhpGcProbeHijackScalarWrapper - - NESTED_ENTRY RhpGcProbeHijackObjectWrapper, .text, RhpPInvokeExceptionGuard - - HijackTargetFakeProlog - - LABELED_RETURN_ADDRESS RhpGcProbeHijackObject - - FixupHijackedCallstack - mov r12, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R0 + PTFF_R0_IS_GCREF) - b RhpGcProbe - NESTED_END RhpGcProbeHijackObjectWrapper - - NESTED_ENTRY RhpGcProbeHijackByrefWrapper, .text, RhpPInvokeExceptionGuard - HijackTargetFakeProlog - - LABELED_RETURN_ADDRESS RhpGcProbeHijackByref - - FixupHijackedCallstack - mov r12, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R0 + PTFF_R0_IS_BYREF) + ldr r3, =RhpTrapThreads + ldr r3, [r3] + tst r3, #TrapThreadsFlags_TrapThreads + bne %0 + bx lr +0 + mov r12, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R0) b RhpGcProbe - NESTED_END RhpGcProbeHijackByrefWrapper + NESTED_END RhpGcProbeHijackWrapper #ifdef FEATURE_GC_STRESS ;; @@ -228,28 +213,15 @@ __PPF_ThreadReg SETS "r2" ;; GC Stress Hijack targets ;; ;; - LEAF_ENTRY RhpGcStressHijackScalar - FixupHijackedCallstack - mov r12, #DEFAULT_FRAME_SAVE_FLAGS - b RhpGcStressProbe - LEAF_END RhpGcStressHijackScalar - - LEAF_ENTRY RhpGcStressHijackObject - FixupHijackedCallstack - mov r12, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R0 + PTFF_R0_IS_GCREF) - b RhpGcStressProbe - LEAF_END RhpGcStressHijackObject - - LEAF_ENTRY RhpGcStressHijackByref + LEAF_ENTRY RhpGcStressHijack FixupHijackedCallstack - mov r12, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R0 + PTFF_R0_IS_BYREF) + mov r12, #DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R0 b RhpGcStressProbe - LEAF_END RhpGcStressHijackByref - + LEAF_END RhpGcStressHijack ;; ;; Worker for our GC stress probes. Do not call directly!! -;; Instead, go through RhpGcStressHijack{Scalar|Object|Byref}. +;; Instead, go through RhpGcStressHijack. ;; This worker performs the GC Stress work and returns to the original return address. ;; ;; Register state on entry: @@ -273,17 +245,7 @@ __PPF_ThreadReg SETS "r2" EXTERN RhpThrowHwEx - LEAF_ENTRY RhpGcProbe - ldr r3, =RhpTrapThreads - ldr r3, [r3] - tst r3, #TrapThreadsFlags_TrapThreads - bne %0 - bx lr -0 - b RhpGcProbeRare - LEAF_END RhpGcProbe - - NESTED_ENTRY RhpGcProbeRare + NESTED_ENTRY RhpGcProbe PROLOG_PROBE_FRAME r2, r3, r12 ldr r0, [r2, #OFFSETOF__Thread__m_pDeferredTransitionFrame] diff --git a/src/coreclr/nativeaot/Runtime/i386/GcProbe.asm b/src/coreclr/nativeaot/Runtime/i386/GcProbe.asm index 578e39d59f340..27759ddb004b1 100644 --- a/src/coreclr/nativeaot/Runtime/i386/GcProbe.asm +++ b/src/coreclr/nativeaot/Runtime/i386/GcProbe.asm @@ -154,12 +154,6 @@ extern RhpThrowHwEx : proc ;; All registers restored as they were when the hijack was first reached. ;; RhpGcProbe proc - test [RhpTrapThreads], TrapThreadsFlags_TrapThreads - jnz SynchronousRendezVous - - HijackFixupEpilog - -SynchronousRendezVous: PushProbeFrame ecx ; bitmask in ECX mov ecx, esp @@ -236,51 +230,23 @@ RhpGcStressProbe endp endif ;; FEATURE_GC_STRESS -FASTCALL_FUNC RhpGcProbeHijackScalar, 0 - +FASTCALL_FUNC RhpGcProbeHijack, 0 HijackFixupProlog - mov ecx, DEFAULT_PROBE_SAVE_FLAGS - jmp RhpGcProbe - -FASTCALL_ENDFUNC - -FASTCALL_FUNC RhpGcProbeHijackObject, 0 - - HijackFixupProlog - mov ecx, DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_GCREF - jmp RhpGcProbe - -FASTCALL_ENDFUNC - -FASTCALL_FUNC RhpGcProbeHijackByref, 0 + test [RhpTrapThreads], TrapThreadsFlags_TrapThreads + jnz DoRhpGcProbe + HijackFixupEpilog - HijackFixupProlog - mov ecx, DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_BYREF +DoRhpGcProbe: + mov ecx, DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_RAX jmp RhpGcProbe FASTCALL_ENDFUNC ifdef FEATURE_GC_STRESS -FASTCALL_FUNC RhpGcStressHijackScalar, 0 - - HijackFixupProlog - mov ecx, DEFAULT_PROBE_SAVE_FLAGS - jmp RhpGcStressProbe - -FASTCALL_ENDFUNC - -FASTCALL_FUNC RhpGcStressHijackObject, 0 - - HijackFixupProlog - mov ecx, DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_GCREF - jmp RhpGcStressProbe - -FASTCALL_ENDFUNC - -FASTCALL_FUNC RhpGcStressHijackByref, 0 +FASTCALL_FUNC RhpGcStressHijack, 0 HijackFixupProlog - mov ecx, DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_BYREF + mov ecx, DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_RAX jmp RhpGcStressProbe FASTCALL_ENDFUNC From f16bb2981a1e62ecc87dabf728f8d828ea23ac40 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Thu, 21 Jul 2022 23:59:37 -0700 Subject: [PATCH 17/17] Renamed `RhpGcProbe` --> `RhpWaitForGC` --- src/coreclr/nativeaot/Runtime/amd64/GcProbe.S | 10 +++++----- src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm | 6 +++--- src/coreclr/nativeaot/Runtime/arm/GcProbe.asm | 6 +++--- src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm | 10 +++++----- src/coreclr/nativeaot/Runtime/i386/GcProbe.asm | 10 +++++----- src/coreclr/nativeaot/Runtime/thread.cpp | 2 +- 6 files changed, 22 insertions(+), 22 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.S b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.S index 6db688d869911..39dcceb5234f3 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.S +++ b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.S @@ -113,15 +113,15 @@ NESTED_ENTRY RhpGcProbeHijack, _TEXT, NoHandler FixupHijackedCallstack test dword ptr [C_VAR(RhpTrapThreads)], TrapThreadsFlags_TrapThreads - jnz LOCAL_LABEL(DoRhpGcProbe) + jnz LOCAL_LABEL(WaitForGC) ret -LOCAL_LABEL(DoRhpGcProbe): +LOCAL_LABEL(WaitForGC): or ecx, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_SAVE_RDX - jmp C_FUNC(RhpGcProbe) + jmp C_FUNC(RhpWaitForGC) NESTED_END RhpGcProbeHijack, _TEXT -NESTED_ENTRY RhpGcProbe, _TEXT, NoHandler +NESTED_ENTRY RhpWaitForGC, _TEXT, NoHandler PUSH_PROBE_FRAME r11, rax, rcx END_PROLOGUE @@ -140,7 +140,7 @@ LOCAL_LABEL(Abort): pop rdx // return address as exception RIP jmp C_FUNC(RhpThrowHwEx) // Throw the ThreadAbortException as a special kind of hardware exception -NESTED_END RhpGcProbe, _TEXT +NESTED_END RhpWaitForGC, _TEXT LEAF_ENTRY RhpGcPoll, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm index c06949cdc3f22..c01ada624f190 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm @@ -111,12 +111,12 @@ NESTED_ENTRY RhpGcProbeHijack, _TEXT, RhpPInvokeExceptionGuard ret @@: or ecx, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX - jmp RhpGcProbe + jmp RhpWaitForGC NESTED_END RhpGcProbeHijack, _TEXT EXTERN RhpThrowHwEx : PROC -NESTED_ENTRY RhpGcProbe, _TEXT +NESTED_ENTRY RhpWaitForGC, _TEXT PUSH_PROBE_FRAME rdx, rax, rcx END_PROLOGUE @@ -135,7 +135,7 @@ Abort: pop rdx ;; return address as exception RIP jmp RhpThrowHwEx ;; Throw the ThreadAbortException as a special kind of hardware exception -NESTED_END RhpGcProbe, _TEXT +NESTED_END RhpWaitForGC, _TEXT LEAF_ENTRY RhpGcPoll, _TEXT cmp [RhpTrapThreads], TrapThreadsFlags_None diff --git a/src/coreclr/nativeaot/Runtime/arm/GcProbe.asm b/src/coreclr/nativeaot/Runtime/arm/GcProbe.asm index 230c03de976ea..64268c10bba92 100644 --- a/src/coreclr/nativeaot/Runtime/arm/GcProbe.asm +++ b/src/coreclr/nativeaot/Runtime/arm/GcProbe.asm @@ -204,7 +204,7 @@ __PPF_ThreadReg SETS "r2" bx lr 0 mov r12, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R0) - b RhpGcProbe + b RhpWaitForGC NESTED_END RhpGcProbeHijackWrapper #ifdef FEATURE_GC_STRESS @@ -245,7 +245,7 @@ __PPF_ThreadReg SETS "r2" EXTERN RhpThrowHwEx - NESTED_ENTRY RhpGcProbe + NESTED_ENTRY RhpWaitForGC PROLOG_PROBE_FRAME r2, r3, r12 ldr r0, [r2, #OFFSETOF__Thread__m_pDeferredTransitionFrame] @@ -263,7 +263,7 @@ __PPF_ThreadReg SETS "r2" EPILOG_NOP mov r1, lr ;; return address as exception PC EPILOG_BRANCH RhpThrowHwEx - NESTED_END RhpGcProbe + NESTED_END RhpWaitForGC LEAF_ENTRY RhpGcPoll ldr r0, =RhpTrapThreads diff --git a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm index 1523a4770c879..e5c2f5a4eebe7 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm +++ b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm @@ -156,17 +156,17 @@ PROBE_FRAME_SIZE field 0 ldr x3, =RhpTrapThreads ldr w3, [x3] - tbnz x3, #TrapThreadsFlags_TrapThreads_Bit, DoRhpGcProbe + tbnz x3, #TrapThreadsFlags_TrapThreads_Bit, WaitForGC ret -DoRhpGcProbe +WaitForGC orr x12, x12, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_X0 + PTFF_SAVE_X1) - b RhpGcProbe + b RhpWaitForGC NESTED_END RhpGcProbeHijackWrapper EXTERN RhpThrowHwEx - NESTED_ENTRY RhpGcProbe + NESTED_ENTRY RhpWaitForGC PUSH_PROBE_FRAME x2, x3, x12 ldr x0, [x2, #OFFSETOF__Thread__m_pDeferredTransitionFrame] @@ -182,7 +182,7 @@ DoRhpGcProbe EPILOG_NOP mov w0, #STATUS_REDHAWK_THREAD_ABORT EPILOG_NOP mov x1, lr ;; return address as exception PC EPILOG_NOP b RhpThrowHwEx - NESTED_END RhpGcProbe + NESTED_END RhpWaitForGC LEAF_ENTRY RhpGcPoll ldr x0, =RhpTrapThreads diff --git a/src/coreclr/nativeaot/Runtime/i386/GcProbe.asm b/src/coreclr/nativeaot/Runtime/i386/GcProbe.asm index 27759ddb004b1..22d7cda1ef285 100644 --- a/src/coreclr/nativeaot/Runtime/i386/GcProbe.asm +++ b/src/coreclr/nativeaot/Runtime/i386/GcProbe.asm @@ -153,7 +153,7 @@ extern RhpThrowHwEx : proc ;; Register state on exit: ;; All registers restored as they were when the hijack was first reached. ;; -RhpGcProbe proc +RhpWaitForGC proc PushProbeFrame ecx ; bitmask in ECX mov ecx, esp @@ -177,7 +177,7 @@ Abort: pop edx ;; return address as exception RIP jmp RhpThrowHwEx -RhpGcProbe endp +RhpWaitForGC endp ifdef FEATURE_GC_STRESS ;; @@ -233,12 +233,12 @@ endif ;; FEATURE_GC_STRESS FASTCALL_FUNC RhpGcProbeHijack, 0 HijackFixupProlog test [RhpTrapThreads], TrapThreadsFlags_TrapThreads - jnz DoRhpGcProbe + jnz WaitForGC HijackFixupEpilog -DoRhpGcProbe: +WaitForGC: mov ecx, DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_RAX - jmp RhpGcProbe + jmp RhpWaitForGC FASTCALL_ENDFUNC diff --git a/src/coreclr/nativeaot/Runtime/thread.cpp b/src/coreclr/nativeaot/Runtime/thread.cpp index e743d3f674a73..9256517277b2c 100644 --- a/src/coreclr/nativeaot/Runtime/thread.cpp +++ b/src/coreclr/nativeaot/Runtime/thread.cpp @@ -571,7 +571,7 @@ void Thread::Hijack() } #if defined(TARGET_ARM64) && defined(TARGET_UNIX) - // TODO: RhpGcProbe and related asm helpers NYI for ARM64/UNIX. + // TODO: RhpGcProbeHijack and related asm helpers NYI for ARM64/UNIX. // disabling hijacking for now. return; #endif