Przeglądaj źródła

hal, x86: use a new trap handling method

Similar to what we've done on riscv64 and loongarch64 platforms, we
adapt captured traps to x86. x86 has a really limited number of
registers. So we reserved 2 words in the percpu area as scratches and
save some of the registers there if needed.

When the trap_return is called, instead of using `TaskContext::switch`,
we handwrite context switch codes so we can save a lot of boilerplate
codes. We push the current flags register on our current stack, load all
registers and save the current stack pointer to %rax's place for later
trap_entry use. Nothing really different for this.

For trap_entry, non-captured traps and captured userspace traps behave
almost like the previous one in the new method except we make some
optimizations on saved registers and so on. What really makes a
difference is kernel space captured ones. x86 will push the interrupt
frame onto the current stack unconditionally and this is cumbersome! We
need to load the actual TrapContext from the percpu area, move the trap
frame there, and proceed with our handler. In fact, this accounts for
66.7% of the percpu areas needed or 100% of the scratches needed. So
might be great to discover some way we can get rid of it.

Signed-off-by: greatbridf <greatbridf@icloud.com>
greatbridf 1 tydzień temu
rodzic
commit
697295347a

+ 3 - 1
crates/eonix_hal/src/arch/x86_64/interrupt.rs

@@ -4,6 +4,8 @@ use core::pin::Pin;
 use core::ptr::NonNull;
 
 use crate::arch::cpu::rdmsr;
+use crate::arch::trap::trap_stubs;
+use crate::symbol_addr;
 
 #[repr(C)]
 #[derive(Clone, Copy)]
@@ -113,7 +115,7 @@ impl InterruptControl {
     /// # Return
     /// Returns a tuple of InterruptControl and the cpu id of the current cpu.
     pub fn new() -> (Self, usize) {
-        let trap_stubs_base = super::trap::trap_stubs_start as usize;
+        let trap_stubs_base = symbol_addr!(trap_stubs);
 
         let idt = core::array::from_fn(|idx| match idx {
             0..0x80 => IDTEntry::new(trap_stubs_base + 8 * idx, 0x08, 0x8e),

+ 4 - 0
crates/eonix_hal/src/arch/x86_64/link.x

@@ -91,6 +91,10 @@ SECTIONS {
         __spercpu = .;
 
         QUAD(0); /* Reserved for x86 percpu pointer */
+        QUAD(0); /* Reserved for x86 percpu capturer trap context */
+
+        QUAD(0); /* Reserved for x86 percpu capturer scratch */
+        QUAD(0); /* Reserved for x86 percpu capturer scratch */
 
         . = ALIGN(16);
 

+ 446 - 213
crates/eonix_hal/src/arch/x86_64/trap/mod.rs

@@ -3,55 +3,26 @@ mod trap_context;
 use core::arch::{asm, global_asm, naked_asm};
 
 use eonix_hal_traits::context::RawTaskContext;
-use eonix_hal_traits::trap::{IrqState as IrqStateTrait, TrapReturn};
+use eonix_hal_traits::trap::{
+    IrqState as IrqStateTrait, RawTrapContext, TrapReturn,
+};
 pub use trap_context::TrapContext;
 
 use super::context::TaskContext;
+use super::cpu::CPU;
 
 unsafe extern "C" {
+    /// Default handler handles the trap on the current stack and returns
+    /// to the context before interrut.
     fn _default_trap_handler(trap_context: &mut TrapContext);
-    pub fn trap_stubs_start();
-    fn _raw_trap_return();
 }
 
-#[eonix_percpu::define_percpu]
-static TRAP_HANDLER: unsafe extern "C" fn() = default_trap_handler;
-
-#[eonix_percpu::define_percpu]
-static CAPTURER_CONTEXT: TaskContext = TaskContext::new();
-
-/// This value will never be used.
-static mut DIRTY_TRAP_CONTEXT: TaskContext = TaskContext::new();
-
 /// State of the interrupt flag.
 pub struct IrqState(u64);
 
-global_asm!(
-    r"
-    .set RAX, 0x00
-    .set RBX, 0x08
-    .set RCX, 0x10
-    .set RDX, 0x18
-    .set RDI, 0x20
-    .set RSI, 0x28
-    .set R8, 0x30
-    .set R9, 0x38
-    .set R10, 0x40
-    .set R11, 0x48
-    .set R12, 0x50
-    .set R13, 0x58
-    .set R14, 0x60
-    .set R15, 0x68
-    .set RBP, 0x70
-    .set INT_NO, 0x78
-    .set ERRCODE, 0x80
-    .set RIP, 0x88
-    .set CS, 0x90
-    .set FLAGS, 0x98
-    .set RSP, 0xa0
-    .set SS, 0xa8
-
-    .macro cfi_all_same_value
+macro_rules! cfi_all_same_value {
+    () => {
+        "
         .cfi_same_value %rax
         .cfi_same_value %rbx
         .cfi_same_value %rcx
@@ -67,44 +38,50 @@ global_asm!(
         .cfi_same_value %r14
         .cfi_same_value %r15
         .cfi_same_value %rbp
-    .endm
+        "
+    };
+}
 
-    .globl {trap_stubs_start}
-    {trap_stubs_start}:
+#[unsafe(naked)]
+pub unsafe extern "C" fn trap_stubs() {
+    naked_asm!(
+        "
         .altmacro
         .macro build_isr_no_err name
             .align 8
-            .globl ISR\name
-            .type  ISR\name @function
-            ISR\name:
+            .globl ISR\\name
+            .type  ISR\\name @function
+            ISR\\name:
                 .cfi_startproc
                 .cfi_signal_frame
                 .cfi_def_cfa_offset 0x08
                 .cfi_offset %rsp, 0x10
-
-                cfi_all_same_value
+            ",
+                cfi_all_same_value!(),
+            "
 
                 push %rbp # push placeholder for error code
                 .cfi_def_cfa_offset 0x10
 
-                call _raw_trap_entry
+                call {entry}
                 .cfi_endproc
         .endm
 
         .altmacro
         .macro build_isr_err name
             .align 8
-            .globl ISR\name
-            .type  ISR\name @function
-            ISR\name:
+            .globl ISR\\name
+            .type  ISR\\name @function
+            ISR\\name:
                 .cfi_startproc
                 .cfi_signal_frame
                 .cfi_def_cfa_offset 0x10
                 .cfi_offset %rsp, 0x10
+            ",
+                cfi_all_same_value!(),
+            "
 
-                cfi_all_same_value
-
-                call _raw_trap_entry
+                call {entry}
                 .cfi_endproc
         .endm
 
@@ -146,185 +123,442 @@ global_asm!(
             build_isr_no_err %i
             .set i, i+1
         .endr
+        ",
+        entry = sym raw_trap_entry,
+        options(att_syntax),
+    )
+}
+
+/// Offset of the capturer trap context in the percpu area.
+const OFFSET_CAPTURER: usize = 8;
+
+#[unsafe(naked)]
+unsafe extern "C" fn raw_trap_entry() {
+    naked_asm!(
+        ".cfi_startproc",
+        ".cfi_signal_frame",
+        ".cfi_def_cfa %rsp, 0x18",
+        ".cfi_offset %rsp, 0x10",
+        cfi_all_same_value!(),
+        "",
+        "cmpq $0x08, 0x18(%rsp)",
+        "je 2f",
+        "swapgs",
+        "",
+        "2:",
+        "subq ${trap_stubs}, (%rsp)",
+        "shrq $3, (%rsp)",
+        "",
+        "cmpq $0, %gs:{offset_capturer}",
+        "je {default_entry}",
+        "",
+        "cmpq $0x08, 0x18(%rsp)",
+        "je {captured_kernel_entry}",
+        "jmp {captured_user_entry}",
+        ".cfi_endproc",
+        trap_stubs = sym trap_stubs,
+        default_entry = sym default_trap_entry,
+        captured_kernel_entry = sym captured_trap_entry_kernel,
+        captured_user_entry = sym captured_trap_entry_user,
+        offset_capturer = const OFFSET_CAPTURER,
+        options(att_syntax),
+    )
+}
 
-    .globl _raw_trap_entry
-    .type  _raw_trap_entry @function
-    _raw_trap_entry:
-        .cfi_startproc
-        .cfi_signal_frame
-        .cfi_def_cfa %rsp, 0x18
-        .cfi_offset %rsp, 0x10
-
-        cfi_all_same_value
-
-        sub $0x78, %rsp
-        .cfi_def_cfa_offset CS
-
-        mov %rax, RAX(%rsp)
-        .cfi_rel_offset %rax, RAX
-        mov %rbx, RBX(%rsp)
-        .cfi_rel_offset %rbx, RBX
-        mov %rcx, RCX(%rsp)
-        .cfi_rel_offset %rcx, RCX
-        mov %rdx, RDX(%rsp)
-        .cfi_rel_offset %rdx, RDX
-        mov %rdi, RDI(%rsp)
-        .cfi_rel_offset %rdi, RDI
-        mov %rsi, RSI(%rsp)
-        .cfi_rel_offset %rsi, RSI
-        mov %r8, R8(%rsp)
-        .cfi_rel_offset %r8, R8
-        mov %r9, R9(%rsp)
-        .cfi_rel_offset %r9, R9
-        mov %r10, R10(%rsp)
-        .cfi_rel_offset %r10, R10
-        mov %r11, R11(%rsp)
-        .cfi_rel_offset %r11, R11
-        mov %r12, R12(%rsp)
-        .cfi_rel_offset %r12, R12
-        mov %r13, R13(%rsp)
-        .cfi_rel_offset %r13, R13
-        mov %r14, R14(%rsp)
-        .cfi_rel_offset %r14, R14
-        mov %r15, R15(%rsp)
-        .cfi_rel_offset %r15, R15
-        mov %rbp, RBP(%rsp)
-        .cfi_rel_offset %rbp, RBP
-
-        mov INT_NO(%rsp), %rcx
-        sub ${trap_stubs_start}, %rcx
-        shr $3, %rcx
-        mov %rcx, INT_NO(%rsp)
-
-        cmpq $0x08, CS(%rsp)
-        je 2f
-        swapgs
-
-        2:
-        mov %gs:0, %rcx
-        add ${handler}, %rcx
-        mov (%rcx), %rcx
-
-        jmp *%rcx
-        .cfi_endproc
-
-    _raw_trap_return:
-        .cfi_startproc
-        .cfi_def_cfa %rsp, CS
-        .cfi_rel_offset %rax, RAX
-        .cfi_rel_offset %rbx, RBX
-        .cfi_rel_offset %rcx, RCX
-        .cfi_rel_offset %rdx, RDX
-        .cfi_rel_offset %rdi, RDI
-        .cfi_rel_offset %rsi, RSI
-        .cfi_rel_offset %r8, R8
-        .cfi_rel_offset %r9, R9
-        .cfi_rel_offset %r10, R10
-        .cfi_rel_offset %r11, R11
-        .cfi_rel_offset %r12, R12
-        .cfi_rel_offset %r13, R13
-        .cfi_rel_offset %r14, R14
-        .cfi_rel_offset %r15, R15
-        .cfi_rel_offset %rbp, RBP
-        .cfi_rel_offset %rsp, RSP
-
-        mov RAX(%rsp), %rax
-        .cfi_restore %rax
-        mov RBX(%rsp), %rbx
-        .cfi_restore %rbx
-        mov RCX(%rsp), %rcx
-        .cfi_restore %rcx
-        mov RDX(%rsp), %rdx
-        .cfi_restore %rdx
-        mov RDI(%rsp), %rdi
-        .cfi_restore %rdi
-        mov RSI(%rsp), %rsi
-        .cfi_restore %rsi
-        mov R8(%rsp), %r8
-        .cfi_restore %r8
-        mov R9(%rsp), %r9
-        .cfi_restore %r9
-        mov R10(%rsp), %r10
-        .cfi_restore %r10
-        mov R11(%rsp), %r11
-        .cfi_restore %r11
-        mov R12(%rsp), %r12
-        .cfi_restore %r12
-        mov R13(%rsp), %r13
-        .cfi_restore %r13
-        mov R14(%rsp), %r14
-        .cfi_restore %r14
-        mov R15(%rsp), %r15
-        .cfi_restore %r15
-        mov RBP(%rsp), %rbp
-        .cfi_restore %rbp
-
-        cmpq $0x08, CS(%rsp)
-        je 2f
-        swapgs
-
-        2:
-        lea RIP(%rsp), %rsp
-        .cfi_def_cfa %rsp, 0x08
-        .cfi_offset %rsp, 0x10
-
-        iretq
-        .cfi_endproc
-    ",
-    trap_stubs_start = sym trap_stubs_start,
-    handler = sym _percpu_inner_TRAP_HANDLER,
-    options(att_syntax),
-);
-
-/// Default handler handles the trap on the current stack and returns
-/// to the context before interrut.
 #[unsafe(naked)]
-unsafe extern "C" fn default_trap_handler() {
+unsafe extern "C" fn default_trap_entry() {
     naked_asm!(
         ".cfi_startproc",
+        ".cfi_signal_frame",
+        ".cfi_def_cfa %rsp, 0x18",
+        ".cfi_offset %rsp, 0x10",
+        cfi_all_same_value!(),
+        "",
+        "sub ${INT_NO}, %rsp",
+        ".cfi_def_cfa_offset {CS}",
+        "",
+        "mov %rcx, {RCX}(%rsp)",
+        ".cfi_rel_offset %rcx, {RCX}",
+        "mov %rdx, {RDX}(%rsp)",
+        ".cfi_rel_offset %rdx, {RDX}",
+        "mov %rdi, {RDI}(%rsp)",
+        ".cfi_rel_offset %rdi, {RDI}",
+        "mov %rsi, {RSI}(%rsp)",
+        ".cfi_rel_offset %rsi, {RSI}",
+        "mov %r8, {R8}(%rsp)",
+        ".cfi_rel_offset %r8, {R8}",
+        "mov %r9, {R9}(%rsp)",
+        ".cfi_rel_offset %r9, {R9}",
+        "mov %r10, {R10}(%rsp)",
+        ".cfi_rel_offset %r10, {R10}",
+        "mov %r11, {R11}(%rsp)",
+        ".cfi_rel_offset %r11, {R11}",
+        "mov %rbx, {RBX}(%rsp)",
+        ".cfi_rel_offset %rbx, {RBX}",
+        "mov %r12, {R12}(%rsp)",
+        ".cfi_rel_offset %r12, {R12}",
+        "",
+        "mov %rax, %r12",
+        ".cfi_register %rax, %r12",
         "mov %rsp, %rbx",
         ".cfi_def_cfa_register %rbx",
         "",
-        "and $~0xf, %rsp",
-        "",
+        "and $-0x10, %rsp",
         "mov %rbx, %rdi",
-        "call {handle_trap}",
+        "",
+        "call {default_entry}",
         "",
         "mov %rbx, %rsp",
         ".cfi_def_cfa_register %rsp",
+        "mov %r12, %rax",
+        ".cfi_restore %rax",
+        "",
+        "mov {RCX}(%rsp), %rcx",
+        ".cfi_restore %rcx",
+        "mov {RDX}(%rsp), %rdx",
+        ".cfi_restore %rdx",
+        "mov {RDI}(%rsp), %rdi",
+        ".cfi_restore %rdi",
+        "mov {RSI}(%rsp), %rsi",
+        ".cfi_restore %rsi",
+        "mov {R8}(%rsp), %r8",
+        ".cfi_restore %r8",
+        "mov {R9}(%rsp), %r9",
+        ".cfi_restore %r9",
+        "mov {R10}(%rsp), %r10",
+        ".cfi_restore %r10",
+        "mov {R11}(%rsp), %r11",
+        ".cfi_restore %r11",
+        "mov {RBX}(%rsp), %rbx",
+        ".cfi_restore %rbx",
+        "mov {R12}(%rsp), %r12",
+        ".cfi_restore %r12",
+        "",
+        "cmpq $0x08, {CS}(%rsp)",
+        "je 2f",
+        "swapgs",
+        "",
+        "2:",
+        "lea {RIP}(%rsp), %rsp",
+        ".cfi_def_cfa %rsp, 0x08",
+        ".cfi_offset %rsp, 0x10",
         "",
-        "jmp {trap_return}",
+        "iretq",
         ".cfi_endproc",
-        handle_trap = sym _default_trap_handler,
-        trap_return = sym _raw_trap_return,
+        default_entry = sym _default_trap_handler,
+        RBX = const TrapContext::OFFSET_RBX,
+        RCX = const TrapContext::OFFSET_RCX,
+        RDX = const TrapContext::OFFSET_RDX,
+        RDI = const TrapContext::OFFSET_RDI,
+        RSI = const TrapContext::OFFSET_RSI,
+        R8 = const TrapContext::OFFSET_R8,
+        R9 = const TrapContext::OFFSET_R9,
+        R10 = const TrapContext::OFFSET_R10,
+        R11 = const TrapContext::OFFSET_R11,
+        R12 = const TrapContext::OFFSET_R12,
+        INT_NO = const TrapContext::OFFSET_INT_NO,
+        RIP = const TrapContext::OFFSET_RIP,
+        CS = const TrapContext::OFFSET_CS,
         options(att_syntax),
-    );
+    )
+}
+
+#[unsafe(naked)]
+unsafe extern "C" fn captured_trap_entry_kernel() {
+    naked_asm!(
+        ".cfi_startproc",
+        ".cfi_signal_frame",
+        ".cfi_def_cfa %rsp, 0x18",
+        ".cfi_offset %rsp, 0x10",
+        cfi_all_same_value!(),
+        "",
+        "mov %rsi, %gs:0x10",
+        ".cfi_undefined %rsi",
+        "mov %rsp, %rsi",
+        ".cfi_def_cfa_register %rsi",
+        ".cfi_register %rsp, %rsi",
+
+        "mov %gs:0x08, %rsp",
+        // Save and load registers.
+        "mov %rcx, {RCX}(%rsp)",
+        ".cfi_rel_offset %rcx, {RCX}",
+        "",
+        "mov %gs:0x10, %rcx",
+        ".cfi_register %rsi, %rcx",
+        "",
+        "mov %rdx, {RDX}(%rsp)",
+        ".cfi_rel_offset %rdx, {RDX}",
+        "mov %rdi, {RDI}(%rsp)",
+        ".cfi_rel_offset %rdi, {RDI}",
+        "mov %rcx, {RSI}(%rsp)",
+        ".cfi_rel_offset %rsi, {RSI}",
+        "mov %r8, {R8}(%rsp)",
+        ".cfi_rel_offset %r8, {R8}",
+        "mov %r9, {R9}(%rsp)",
+        ".cfi_rel_offset %r9, {R9}",
+        "mov %r10, {R10}(%rsp)",
+        ".cfi_rel_offset %r10, {R10}",
+        "mov %r11, {R11}(%rsp)",
+        ".cfi_rel_offset %r11, {R11}",
+        "",
+        "xchg %rax, {RAX}(%rsp)",
+        ".cfi_rel_offset %rax, {RAX}",
+        "xchg %rbx, {RBX}(%rsp)",
+        ".cfi_rel_offset %rbx, {RBX}",
+        "xchg %r12, {R12}(%rsp)",
+        ".cfi_rel_offset %r12, {R12}",
+        "xchg %r13, {R13}(%rsp)",
+        ".cfi_rel_offset %r13, {R13}",
+        "xchg %r14, {R14}(%rsp)",
+        ".cfi_rel_offset %r14, {R14}",
+        "xchg %r15, {R15}(%rsp)",
+        ".cfi_rel_offset %r15, {R15}",
+        "xchg %rbp, {RBP}(%rsp)",
+        ".cfi_rel_offset %rbp, {RBP}",
+        "",
+        "lea {INT_NO}(%rsp), %rdi",
+        "mov $7, %rcx",
+        "cld",
+        "rep movsq",
+        "",
+        "mov %rax, %rsp",
+        ".cfi_def_cfa %rsp, 0x10",
+        ".cfi_undefined %rax",
+        ".cfi_restore %rbx",
+        ".cfi_undefined %rcx",
+        ".cfi_undefined %rdx",
+        ".cfi_undefined %rdi",
+        ".cfi_undefined %rsi",
+        ".cfi_undefined %r8",
+        ".cfi_undefined %r9",
+        ".cfi_undefined %r10",
+        ".cfi_undefined %r11",
+        ".cfi_restore %r12",
+        ".cfi_restore %r13",
+        ".cfi_restore %r14",
+        ".cfi_restore %r15",
+        ".cfi_restore %rbp",
+        "",
+        "popf",
+        ".cfi_def_cfa_offset 0x08",
+        "",
+        "ret",
+        ".cfi_endproc",
+        RAX = const TrapContext::OFFSET_RAX,
+        RBX = const TrapContext::OFFSET_RBX,
+        RCX = const TrapContext::OFFSET_RCX,
+        RDX = const TrapContext::OFFSET_RDX,
+        RDI = const TrapContext::OFFSET_RDI,
+        RSI = const TrapContext::OFFSET_RSI,
+        R8 = const TrapContext::OFFSET_R8,
+        R9 = const TrapContext::OFFSET_R9,
+        R10 = const TrapContext::OFFSET_R10,
+        R11 = const TrapContext::OFFSET_R11,
+        R12 = const TrapContext::OFFSET_R12,
+        R13 = const TrapContext::OFFSET_R13,
+        R14 = const TrapContext::OFFSET_R14,
+        R15 = const TrapContext::OFFSET_R15,
+        RBP = const TrapContext::OFFSET_RBP,
+        INT_NO = const TrapContext::OFFSET_INT_NO,
+        options(att_syntax),
+    )
 }
 
 #[unsafe(naked)]
-unsafe extern "C" fn captured_trap_handler() {
+unsafe extern "C" fn captured_trap_entry_user() {
     naked_asm!(
-        "mov ${from_context}, %rdi",
-        "mov %gs:0, %rsi",
-        "add ${to_context}, %rsi",
+        ".cfi_startproc",
+        ".cfi_signal_frame",
+        ".cfi_def_cfa %rsp, 0x18",
+        ".cfi_offset %rsp, 0x10",
+        cfi_all_same_value!(),
+        "",
+        "sub ${INT_NO}, %rsp",
+        ".cfi_def_cfa_offset {CS}",
+        "",
+        // Save and load registers.
+        "mov %rcx, {RCX}(%rsp)",
+        ".cfi_rel_offset %rcx, {RCX}",
+        "mov %rdx, {RDX}(%rsp)",
+        ".cfi_rel_offset %rdx, {RDX}",
+        "mov %rdi, {RDI}(%rsp)",
+        ".cfi_rel_offset %rdi, {RDI}",
+        "mov %rsi, {RSI}(%rsp)",
+        ".cfi_rel_offset %rsi, {RSI}",
+        "mov %r8,  {R8}(%rsp)",
+        ".cfi_rel_offset %r8, {R8}",
+        "mov %r9,  {R9}(%rsp)",
+        ".cfi_rel_offset %r9, {R9}",
+        "mov %r10, {R10}(%rsp)",
+        ".cfi_rel_offset %r10, {R10}",
+        "mov %r11, {R11}(%rsp)",
+        ".cfi_rel_offset %r11, {R11}",
+        "",
+        "xchg %rax, {RAX}(%rsp)",
+        ".cfi_rel_offset %rax, {RAX}",
+        "xchg %rbx, {RBX}(%rsp)",
+        ".cfi_rel_offset %rbx, {RBX}",
+        "xchg %r12, {R12}(%rsp)",
+        ".cfi_rel_offset %r12, {R12}",
+        "xchg %r13, {R13}(%rsp)",
+        ".cfi_rel_offset %r13, {R13}",
+        "xchg %r14, {R14}(%rsp)",
+        ".cfi_rel_offset %r14, {R14}",
+        "xchg %r15, {R15}(%rsp)",
+        ".cfi_rel_offset %r15, {R15}",
+        "xchg %rbp, {RBP}(%rsp)",
+        ".cfi_rel_offset %rbp, {RBP}",
+        "",
+        "mov %rax, %rsp",
+        ".cfi_def_cfa %rsp, 0x10",
+        ".cfi_undefined %rax",
+        ".cfi_restore %rbx",
+        ".cfi_undefined %rcx",
+        ".cfi_undefined %rdx",
+        ".cfi_undefined %rdi",
+        ".cfi_undefined %rsi",
+        ".cfi_undefined %r8",
+        ".cfi_undefined %r9",
+        ".cfi_undefined %r10",
+        ".cfi_undefined %r11",
+        ".cfi_restore %r12",
+        ".cfi_restore %r13",
+        ".cfi_restore %r14",
+        ".cfi_restore %r15",
+        ".cfi_restore %rbp",
         "",
-        "mov %rdi, %rsp", // We need a temporary stack to use `switch()`.
+        "popf",
+        ".cfi_def_cfa_offset 0x08",
         "",
-        "jmp {switch}",
-        from_context = sym DIRTY_TRAP_CONTEXT,
-        to_context = sym _percpu_inner_CAPTURER_CONTEXT,
-        switch = sym TaskContext::switch,
+        "ret",
+        ".cfi_endproc",
+        RAX = const TrapContext::OFFSET_RAX,
+        RBX = const TrapContext::OFFSET_RBX,
+        RCX = const TrapContext::OFFSET_RCX,
+        RDX = const TrapContext::OFFSET_RDX,
+        RDI = const TrapContext::OFFSET_RDI,
+        RSI = const TrapContext::OFFSET_RSI,
+        R8 = const TrapContext::OFFSET_R8,
+        R9 = const TrapContext::OFFSET_R9,
+        R10 = const TrapContext::OFFSET_R10,
+        R11 = const TrapContext::OFFSET_R11,
+        R12 = const TrapContext::OFFSET_R12,
+        R13 = const TrapContext::OFFSET_R13,
+        R14 = const TrapContext::OFFSET_R14,
+        R15 = const TrapContext::OFFSET_R15,
+        RBP = const TrapContext::OFFSET_RBP,
+        INT_NO = const TrapContext::OFFSET_INT_NO,
+        CS = const TrapContext::OFFSET_CS,
         options(att_syntax),
-    );
+    )
 }
 
 #[unsafe(naked)]
-unsafe extern "C" fn captured_trap_return(trap_context: usize) -> ! {
+unsafe extern "C" fn captured_trap_return(trap_context: &mut TrapContext) {
     naked_asm!(
-        "jmp {trap_return}",
-        trap_return = sym _raw_trap_return,
+        ".cfi_startproc",
+        ".cfi_signal_frame",
+        ".cfi_def_cfa %rsp, 0x08",
+        "",
+        "pushf",
+        ".cfi_def_cfa_offset 0x10",
+        "",
+        "mov %rdi, %rax",
+        ".cfi_def_cfa %rax, {CS}",
+        ".cfi_rel_offset %rcx, {RCX}",
+        ".cfi_rel_offset %rdx, {RDX}",
+        ".cfi_rel_offset %rdi, {RDI}",
+        ".cfi_rel_offset %rsi, {RSI}",
+        ".cfi_rel_offset %r8, {R8}",
+        ".cfi_rel_offset %r9, {R9}",
+        ".cfi_rel_offset %r10, {R10}",
+        ".cfi_rel_offset %r11, {R11}",
+        ".cfi_rel_offset %rax, {RAX}",
+        ".cfi_rel_offset %rbx, {RBX}",
+        ".cfi_rel_offset %r12, {R12}",
+        ".cfi_rel_offset %r13, {R13}",
+        ".cfi_rel_offset %r14, {R14}",
+        ".cfi_rel_offset %r15, {R15}",
+        ".cfi_rel_offset %rbp, {RBP}",
+        ".cfi_rel_offset %rflags, {FLAGS}",
+        ".cfi_rel_offset %rsp, {RSP}",
+        "",
+        "mov {RCX}(%rax), %rcx",
+        ".cfi_restore %rcx",
+        "mov {RDX}(%rax), %rdx",
+        ".cfi_restore %rdx",
+        "mov {RDI}(%rax), %rdi",
+        ".cfi_restore %rdi",
+        "mov {RSI}(%rax), %rsi",
+        ".cfi_restore %rsi",
+        "mov  {R8}(%rax), %r8",
+        ".cfi_restore %r8",
+        "mov  {R9}(%rax), %r9",
+        ".cfi_restore %r9",
+        "mov {R10}(%rax), %r10",
+        ".cfi_restore %r10",
+        "mov {R11}(%rax), %r11",
+        ".cfi_restore %r11",
+        "",
+        "xchg %rax, %rsp",
+        "xchg %rax, {RAX}(%rsp)",
+        ".cfi_restore %rax",
+        "xchg %rbx, {RBX}(%rsp)",
+        ".cfi_restore %rbx",
+        "xchg %r12, {R12}(%rsp)",
+        ".cfi_restore %r12",
+        "xchg %r13, {R13}(%rsp)",
+        ".cfi_restore %r13",
+        "xchg %r14, {R14}(%rsp)",
+        ".cfi_restore %r14",
+        "xchg %r15, {R15}(%rsp)",
+        ".cfi_restore %r15",
+        "xchg %rbp, {RBP}(%rsp)",
+        ".cfi_restore %rbp",
+        "",
+        "cmpq $0x08, {CS}(%rsp)",
+        "je 2f",
+        "swapgs",
+        "",
+        "2:",
+        "lea {RIP}(%rsp), %rsp",
+        ".cfi_def_cfa %rsp, 0x08",
+        "iretq",
+        ".cfi_endproc",
+        RAX = const TrapContext::OFFSET_RAX,
+        RBX = const TrapContext::OFFSET_RBX,
+        RCX = const TrapContext::OFFSET_RCX,
+        RDX = const TrapContext::OFFSET_RDX,
+        RDI = const TrapContext::OFFSET_RDI,
+        RSI = const TrapContext::OFFSET_RSI,
+        R8 = const TrapContext::OFFSET_R8,
+        R9 = const TrapContext::OFFSET_R9,
+        R10 = const TrapContext::OFFSET_R10,
+        R11 = const TrapContext::OFFSET_R11,
+        R12 = const TrapContext::OFFSET_R12,
+        R13 = const TrapContext::OFFSET_R13,
+        R14 = const TrapContext::OFFSET_R14,
+        R15 = const TrapContext::OFFSET_R15,
+        RBP = const TrapContext::OFFSET_RBP,
+        RIP = const TrapContext::OFFSET_RIP,
+        CS = const TrapContext::OFFSET_CS,
+        FLAGS = const TrapContext::OFFSET_FLAGS,
+        RSP = const TrapContext::OFFSET_RSP,
+        options(att_syntax),
+    );
+}
+
+unsafe fn swap_percpu_capturer(new_capturer: usize) -> usize {
+    let old_capturer: usize;
+    asm!(
+        "mov %gs:0x08, {old}",
+        "mov {new}, %gs:0x08",
+        new = in(reg) new_capturer,
+        old = out(reg) old_capturer,
         options(att_syntax),
     );
+
+    old_capturer
 }
 
 impl TrapReturn for TrapContext {
@@ -332,18 +566,17 @@ impl TrapReturn for TrapContext {
 
     unsafe fn trap_return(&mut self) {
         let irq_states = disable_irqs_save();
-        let old_handler = TRAP_HANDLER.swap(captured_trap_handler);
-
-        let mut to_ctx = TaskContext::new();
-        to_ctx.set_program_counter(captured_trap_return as _);
-        to_ctx.set_stack_pointer(&raw mut *self as usize);
-        to_ctx.set_interrupt_enabled(false);
+        let old_handler = swap_percpu_capturer(self as *mut _ as usize);
 
         unsafe {
-            TaskContext::switch(CAPTURER_CONTEXT.as_mut(), &mut to_ctx);
+            CPU::local()
+                .as_mut()
+                .load_interrupt_stack(self as *mut _ as usize as u64);
         }
 
-        TRAP_HANDLER.set(old_handler);
+        captured_trap_return(self);
+
+        swap_percpu_capturer(old_handler);
         irq_states.restore();
     }
 }

+ 31 - 3
crates/eonix_hal/src/arch/x86_64/trap/trap_context.rs

@@ -1,4 +1,5 @@
 use core::arch::asm;
+use core::mem::offset_of;
 
 use eonix_hal_traits::fault::{Fault, PageFaultErrorCode};
 use eonix_hal_traits::trap::{RawTrapContext, TrapType};
@@ -6,11 +7,9 @@ use eonix_mm::address::VAddr;
 
 use crate::processor::CPU;
 
-#[derive(Clone, Copy, Default)]
 #[repr(C, align(16))]
+#[derive(Clone, Copy, Default)]
 pub struct TrapContext {
-    rax: u64,
-    rbx: u64,
     rcx: u64,
     rdx: u64,
     rdi: u64,
@@ -19,11 +18,17 @@ pub struct TrapContext {
     r9: u64,
     r10: u64,
     r11: u64,
+
+    /// on `trap_return`: save capturer %rsp, load %rax
+    /// on `trap_entry`: save %rax, load capturer %rsp
+    rax: u64,
+    rbx: u64,
     r12: u64,
     r13: u64,
     r14: u64,
     r15: u64,
     rbp: u64,
+
     int_no: u64,
     errcode: u64,
     rip: u64,
@@ -34,6 +39,29 @@ pub struct TrapContext {
 }
 
 impl TrapContext {
+    pub const OFFSET_RAX: usize = offset_of!(TrapContext, rax);
+    pub const OFFSET_RBX: usize = offset_of!(TrapContext, rbx);
+    pub const OFFSET_RCX: usize = offset_of!(TrapContext, rcx);
+    pub const OFFSET_RDX: usize = offset_of!(TrapContext, rdx);
+    pub const OFFSET_RDI: usize = offset_of!(TrapContext, rdi);
+    pub const OFFSET_RSI: usize = offset_of!(TrapContext, rsi);
+    pub const OFFSET_R8: usize = offset_of!(TrapContext, r8);
+    pub const OFFSET_R9: usize = offset_of!(TrapContext, r9);
+    pub const OFFSET_R10: usize = offset_of!(TrapContext, r10);
+    pub const OFFSET_R11: usize = offset_of!(TrapContext, r11);
+    pub const OFFSET_R12: usize = offset_of!(TrapContext, r12);
+    pub const OFFSET_R13: usize = offset_of!(TrapContext, r13);
+    pub const OFFSET_R14: usize = offset_of!(TrapContext, r14);
+    pub const OFFSET_R15: usize = offset_of!(TrapContext, r15);
+    pub const OFFSET_RBP: usize = offset_of!(TrapContext, rbp);
+    pub const OFFSET_INT_NO: usize = offset_of!(TrapContext, int_no);
+    pub const OFFSET_ERRCODE: usize = offset_of!(TrapContext, errcode);
+    pub const OFFSET_RIP: usize = offset_of!(TrapContext, rip);
+    pub const OFFSET_CS: usize = offset_of!(TrapContext, cs);
+    pub const OFFSET_FLAGS: usize = offset_of!(TrapContext, flags);
+    pub const OFFSET_RSP: usize = offset_of!(TrapContext, rsp);
+    pub const OFFSET_SS: usize = offset_of!(TrapContext, ss);
+
     fn get_fault_type(&self) -> Fault {
         match self.int_no {
             6 | 8 => Fault::InvalidOp,