3 Revize 07cdd43e60 ... d161134335

Autor SHA1 Zpráva Datum
  greatbridf d161134335 chore: add tmux debug script in Makefile před 3 týdny
  greatbridf 13f7fff46d Merge branch 'multiarch' před 3 týdny
  shao f048367b02 refactor: refactor better abstraction for context switch před 3 týdny
52 změnil soubory, kde provedl 1402 přidání a 1218 odebrání
  1. 3 15
      Cargo.lock
  2. 9 0
      Makefile.src
  3. 8 5
      arch/Cargo.lock
  4. 2 2
      arch/Cargo.toml
  5. 7 0
      arch/percpu-macros/Cargo.lock
  6. 2 2
      arch/percpu-macros/Cargo.toml
  7. 1 2
      arch/percpu-macros/src/arch.rs
  8. 0 0
      arch/percpu-macros/src/lib.rs
  9. 0 53
      arch/percpu/Cargo.lock
  10. 0 8
      arch/percpu/Cargo.toml
  11. 0 25
      arch/percpu/src/arch.rs
  12. 0 6
      arch/percpu/src/lib.rs
  13. 10 94
      arch/src/lib.rs
  14. 71 0
      arch/src/x86_64/context.rs
  15. 1 2
      arch/src/x86_64/gdt.rs
  16. 241 0
      arch/src/x86_64/init.rs
  17. 237 0
      arch/src/x86_64/interrupt.rs
  18. 214 0
      arch/src/x86_64/interrupt.s
  19. 0 0
      arch/src/x86_64/io.rs
  20. 134 0
      arch/src/x86_64/mod.rs
  21. 16 0
      arch/src/x86_64/percpu.rs
  22. 54 0
      arch/src/x86_64/user.rs
  23. 0 6
      arch/x86_64/Cargo.toml
  24. 0 27
      arch/x86_64/src/interrupt.rs
  25. 0 70
      arch/x86_64/src/lib.rs
  26. 0 172
      arch/x86_64/src/task.rs
  27. 1 1
      global_find.sh
  28. 0 74
      src/boot.s
  29. 2 2
      src/driver.rs
  30. 8 0
      src/elf.rs
  31. 1 1
      src/kernel.rs
  32. 0 5
      src/kernel/arch.rs
  33. 0 82
      src/kernel/arch/x86_64.rs
  34. 0 126
      src/kernel/arch/x86_64/init.rs
  35. 0 129
      src/kernel/arch/x86_64/interrupt.rs
  36. 30 0
      src/kernel/cpu.rs
  37. 11 5
      src/kernel/interrupt.rs
  38. 8 8
      src/kernel/mem/mm_list/page_fault.rs
  39. 6 6
      src/kernel/mem/page_table.rs
  40. 59 0
      src/kernel/mem/paging.rs
  41. 58 5
      src/kernel/smp.rs
  42. 0 32
      src/kernel/smp/percpu.rs
  43. 25 22
      src/kernel/syscall.rs
  44. 21 64
      src/kernel/syscall/procops.rs
  45. 2 2
      src/kernel/task.rs
  46. 17 94
      src/kernel/task/kstack.rs
  47. 14 10
      src/kernel/task/scheduler.rs
  48. 16 14
      src/kernel/task/signal.rs
  49. 84 23
      src/kernel/task/thread.rs
  50. 2 2
      src/kernel/timer.rs
  51. 25 22
      src/lib.rs
  52. 2 0
      src/sync/semaphore.rs

+ 3 - 15
Cargo.lock

@@ -1,6 +1,6 @@
 # This file is automatically @generated by Cargo.
 # It is not intended for manual editing.
-version = 3
+version = 4
 
 [[package]]
 name = "aho-corasick"
@@ -15,8 +15,8 @@ dependencies = [
 name = "arch"
 version = "0.1.0"
 dependencies = [
- "percpu",
- "x86_64",
+ "cfg-if",
+ "percpu-macros",
 ]
 
 [[package]]
@@ -173,14 +173,6 @@ dependencies = [
  "minimal-lexical",
 ]
 
-[[package]]
-name = "percpu"
-version = "0.1.0"
-dependencies = [
- "percpu-macros",
- "x86_64",
-]
-
 [[package]]
 name = "percpu-macros"
 version = "0.1.0"
@@ -354,7 +346,3 @@ name = "windows_x86_64_msvc"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
-
-[[package]]
-name = "x86_64"
-version = "0.1.0"

+ 9 - 0
Makefile.src

@@ -51,6 +51,15 @@ debug:
 		-iex 'target remote:1234'
 	-killall $(QEMU_BIN)
 
+.PHONY: tmux-debug
+tmux-debug:
+	tmux new-session -s gbos-debug -d
+	-tmux split-window -t gbos-debug -hf
+	-tmux send-keys -t gbos-debug:1.1 'make srun' C-m
+	-tmux send-keys -t gbos-debug:1.2 'make debug' C-m C-m
+	-tmux attach -t gbos-debug
+	tmux kill-session -t gbos-debug
+
 build/boot.vdi: build/boot.img
 	-rm build/boot.vdi
 	VBoxManage convertfromraw $< $@ --format VDI

+ 8 - 5
arch/Cargo.lock

@@ -6,10 +6,16 @@ version = 3
 name = "arch"
 version = "0.1.0"
 dependencies = [
+ "cfg-if",
  "percpu",
- "x86_64",
 ]
 
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
 [[package]]
 name = "percpu"
 version = "0.1.0"
@@ -21,6 +27,7 @@ dependencies = [
 name = "percpu-macros"
 version = "0.1.0"
 dependencies = [
+ "proc-macro2",
  "quote",
  "syn",
 ]
@@ -59,7 +66,3 @@ name = "unicode-ident"
 version = "1.0.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83"
-
-[[package]]
-name = "x86_64"
-version = "0.1.0"

+ 2 - 2
arch/Cargo.toml

@@ -4,5 +4,5 @@ version = "0.1.0"
 edition = "2021"
 
 [dependencies]
-x86_64 = { path="./x86_64" }
-percpu = { path="./percpu" }
+percpu-macros = { path="./percpu-macros" }
+cfg-if = "1.0"

+ 7 - 0
arch/percpu/macros/Cargo.lock → arch/percpu-macros/Cargo.lock

@@ -2,10 +2,17 @@
 # It is not intended for manual editing.
 version = 3
 
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
 [[package]]
 name = "percpu-macros"
 version = "0.1.0"
 dependencies = [
+ "cfg-if",
  "proc-macro2",
  "quote",
  "syn",

+ 2 - 2
arch/percpu/macros/Cargo.toml → arch/percpu-macros/Cargo.toml

@@ -7,6 +7,6 @@ edition = "2021"
 proc-macro = true
 
 [dependencies]
-syn = { version = "2.0", features = ["full"] }
-quote = "1.0"
 proc-macro2 = "1.0"
+quote = "1.0"
+syn = { version = "2.0", features = ["full"] }

+ 1 - 2
arch/percpu/macros/src/arch.rs → arch/percpu-macros/src/arch.rs

@@ -5,8 +5,7 @@ use syn::{Ident, Type};
 /// Get the base address for percpu variables of the current thread.
 pub fn get_percpu_pointer(percpu: &Ident, ty: &Type) -> TokenStream {
     quote! {
-        #[cfg(target_arch = "x86_64")]
-        {
+        #[cfg(target_arch = "x86_64")] {
             let base: *mut #ty;
             ::core::arch::asm!(
                 "mov %gs:0, {address}",

+ 0 - 0
arch/percpu/macros/src/lib.rs → arch/percpu-macros/src/lib.rs


+ 0 - 53
arch/percpu/Cargo.lock

@@ -1,53 +0,0 @@
-# This file is automatically @generated by Cargo.
-# It is not intended for manual editing.
-version = 3
-
-[[package]]
-name = "percpu"
-version = "0.1.0"
-dependencies = [
- "percpu-macros",
-]
-
-[[package]]
-name = "percpu-macros"
-version = "0.1.0"
-dependencies = [
- "quote",
- "syn",
-]
-
-[[package]]
-name = "proc-macro2"
-version = "1.0.92"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0"
-dependencies = [
- "unicode-ident",
-]
-
-[[package]]
-name = "quote"
-version = "1.0.37"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af"
-dependencies = [
- "proc-macro2",
-]
-
-[[package]]
-name = "syn"
-version = "2.0.89"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "44d46482f1c1c87acd84dea20c1bf5ebff4c757009ed6bf19cfd36fb10e92c4e"
-dependencies = [
- "proc-macro2",
- "quote",
- "unicode-ident",
-]
-
-[[package]]
-name = "unicode-ident"
-version = "1.0.14"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83"

+ 0 - 8
arch/percpu/Cargo.toml

@@ -1,8 +0,0 @@
-[package]
-name = "percpu"
-version = "0.1.0"
-edition = "2021"
-
-[dependencies]
-percpu-macros = { path = "macros" }
-x86_64 = { path = "../x86_64" }

+ 0 - 25
arch/percpu/src/arch.rs

@@ -1,25 +0,0 @@
-pub unsafe fn save_percpu_pointer(percpu_area_base: *mut ()) {
-    #[cfg(target_arch = "x86_64")]
-    x86_64::task::wrmsr(0xC0000101, percpu_area_base as u64);
-
-    #[cfg(not(target_arch = "x86_64"))]
-    compile_error!("unsupported architecture");
-}
-
-pub unsafe fn set_percpu_area_thiscpu(percpu_area_base: *mut ()) {
-    use core::arch::asm;
-
-    save_percpu_pointer(percpu_area_base);
-
-    #[cfg(target_arch = "x86_64")]
-    {
-        asm!(
-            "movq {}, %gs:0",
-            in(reg) percpu_area_base,
-            options(att_syntax)
-        );
-    }
-
-    #[cfg(not(target_arch = "x86_64"))]
-    compile_error!("unsupported architecture");
-}

+ 0 - 6
arch/percpu/src/lib.rs

@@ -1,6 +0,0 @@
-#![no_std]
-
-mod arch;
-
-pub use arch::set_percpu_area_thiscpu;
-pub use percpu_macros::define_percpu;

+ 10 - 94
arch/src/lib.rs

@@ -1,98 +1,14 @@
 #![no_std]
+#![feature(naked_functions)]
 
-pub mod vm {
-    pub fn invlpg(vaddr: usize) {
-        x86_64::vm::invlpg(vaddr)
+cfg_if::cfg_if! {
+    if #[cfg(target_arch = "x86_64")] {
+        mod x86_64;
+        pub use self::x86_64::*;
+    } else if #[cfg(target_arch = "riscv64")] {
+        mod riscv;
+        pub use self::riscv::*;
+    } else if #[cfg(target_arch = "aarch64")]{
+        // TODO!!!
     }
-
-    pub fn invlpg_all() {
-        x86_64::vm::invlpg_all()
-    }
-
-    pub fn current_page_table() -> usize {
-        x86_64::vm::get_cr3()
-    }
-
-    pub fn switch_page_table(pfn: usize) {
-        x86_64::vm::set_cr3(pfn)
-    }
-}
-
-pub mod task {
-    #[inline(always)]
-    pub fn halt() {
-        x86_64::task::halt()
-    }
-
-    #[inline(always)]
-    pub fn pause() {
-        x86_64::task::pause()
-    }
-
-    #[inline(always)]
-    pub fn freeze() -> ! {
-        x86_64::task::freeze()
-    }
-
-    /// Switch to the `next` task. `IF` state is also switched.
-    ///
-    /// This function should only be used to switch between tasks that do not need SMP synchronization.
-    ///
-    /// # Arguments
-    /// * `current_task_sp` - Pointer to the stack pointer of the current task.
-    /// * `next_task_sp` - Pointer to the stack pointer of the next task.
-    #[inline(always)]
-    pub fn context_switch_light(current_task_sp: *mut usize, next_task_sp: *mut usize) {
-        x86_64::task::context_switch_light(current_task_sp, next_task_sp);
-    }
-
-    #[cfg(target_arch = "x86_64")]
-    pub use x86_64::task::{rdmsr, wrmsr};
 }
-
-pub mod interrupt {
-    #[inline(always)]
-    pub fn enable() {
-        x86_64::interrupt::enable()
-    }
-
-    #[inline(always)]
-    pub fn disable() {
-        x86_64::interrupt::disable()
-    }
-}
-
-pub mod io {
-    #[inline(always)]
-    pub fn inb(port: u16) -> u8 {
-        x86_64::io::inb(port)
-    }
-
-    #[inline(always)]
-    pub fn outb(port: u16, data: u8) {
-        x86_64::io::outb(port, data)
-    }
-
-    #[inline(always)]
-    pub fn inw(port: u16) -> u16 {
-        x86_64::io::inw(port)
-    }
-
-    #[inline(always)]
-    pub fn outw(port: u16, data: u16) {
-        x86_64::io::outw(port, data)
-    }
-
-    #[inline(always)]
-    pub fn inl(port: u16) -> u32 {
-        x86_64::io::inl(port)
-    }
-
-    #[inline(always)]
-    pub fn outl(port: u16, data: u32) {
-        x86_64::io::outl(port, data)
-    }
-}
-
-pub use percpu::{define_percpu, set_percpu_area_thiscpu};
-pub use x86_64;

+ 71 - 0
arch/src/x86_64/context.rs

@@ -0,0 +1,71 @@
+use core::arch::asm;
+
+#[repr(C)]
+#[derive(Debug, Default)]
+struct ContextSwitchFrame {
+    r15: u64,
+    r14: u64,
+    r13: u64,
+    r12: u64,
+    rbx: u64,
+    rbp: u64,
+    eflags: u64,
+    rip: u64,
+}
+
+/// Necessary hardware states of task for context switch
+pub struct TaskContext {
+    /// The kernel stack pointer
+    pub rsp: u64,
+    // Extended states, i.e., FP/SIMD states to do!
+}
+
+impl TaskContext {
+    pub const fn new() -> Self {
+        Self { rsp: 0 }
+    }
+
+    pub fn init(&mut self, entry: usize, kstack_top: usize) {
+        unsafe {
+            let frame_ptr = (kstack_top as *mut ContextSwitchFrame).sub(1);
+            core::ptr::write(
+                frame_ptr,
+                ContextSwitchFrame {
+                    rip: entry as u64,
+                    eflags: 0x200,
+                    ..Default::default()
+                },
+            );
+            self.rsp = frame_ptr as u64;
+        }
+    }
+
+    #[inline(always)]
+    pub fn switch_to(&mut self, next_task: &mut Self) {
+        unsafe { _switch_to(&mut self.rsp, &mut next_task.rsp) }
+    }
+}
+
+#[naked]
+unsafe extern "C" fn _switch_to(current_context_sp: &mut u64, next_context_sp: &mut u64) {
+    asm!(
+        "pushf",
+        "push %rbp",
+        "push %rbx",
+        "push %r12",
+        "push %r13",
+        "push %r14",
+        "push %r15",
+        "mov %rsp, (%rdi)",
+        "mov (%rsi), %rsp",
+        "pop %r15",
+        "pop %r14",
+        "pop %r13",
+        "pop %r12",
+        "pop %rbx",
+        "pop %rbp",
+        "popf",
+        "ret",
+        options(att_syntax, noreturn),
+    );
+}

+ 1 - 2
arch/x86_64/src/gdt.rs → arch/src/x86_64/gdt.rs

@@ -1,7 +1,6 @@
+use crate::TSS;
 use core::arch::asm;
 
-use crate::task::TSS;
-
 #[repr(transparent)]
 #[derive(Debug, Clone, Copy)]
 pub struct GDTEntry(u64);

+ 241 - 0
arch/src/x86_64/init.rs

@@ -0,0 +1,241 @@
+use core::{
+    alloc::Layout,
+    pin::Pin,
+    ptr::{addr_of, NonNull},
+};
+
+use super::{enable_sse, percpu::init_percpu_area_thiscpu, GDTEntry, InterruptControl, GDT};
+
+#[repr(C)]
+#[derive(Debug, Clone, Copy)]
+#[allow(non_camel_case_types)]
+struct TSS_SP {
+    low: u32,
+    high: u32,
+}
+
+#[repr(C)]
+pub struct TSS {
+    _reserved1: u32,
+    rsp: [TSS_SP; 3],
+    _reserved2: u32,
+    _reserved3: u32,
+    ist: [TSS_SP; 7],
+    _reserved4: u32,
+    _reserved5: u32,
+    _reserved6: u16,
+    iomap_base: u16,
+}
+
+impl TSS {
+    pub fn new() -> Self {
+        Self {
+            _reserved1: 0,
+            rsp: [TSS_SP { low: 0, high: 0 }; 3],
+            _reserved2: 0,
+            _reserved3: 0,
+            ist: [TSS_SP { low: 0, high: 0 }; 7],
+            _reserved4: 0,
+            _reserved5: 0,
+            _reserved6: 0,
+            iomap_base: 0,
+        }
+    }
+
+    pub fn set_rsp0(&mut self, rsp: u64) {
+        self.rsp[0].low = rsp as u32;
+        self.rsp[0].high = (rsp >> 32) as u32;
+    }
+}
+
+/// Architecture-specific per-cpu status.
+#[allow(dead_code)]
+pub struct CPUStatus {
+    id: usize,
+    gdt: GDT,
+    tss: TSS,
+
+    percpu_area: NonNull<u8>,
+    pub interrupt: InterruptControl,
+}
+
+impl CPUStatus {
+    pub unsafe fn new_thiscpu<F>(allocate: F) -> Self
+    where
+        F: FnOnce(Layout) -> NonNull<u8>,
+    {
+        const PAGE_SIZE: usize = 0x1000;
+        extern "C" {
+            static PERCPU_PAGES: usize;
+            fn _PERCPU_DATA_START();
+        }
+
+        let percpu_area = allocate(Layout::from_size_align_unchecked(
+            PERCPU_PAGES * PAGE_SIZE,
+            PAGE_SIZE,
+        ));
+
+        percpu_area.copy_from_nonoverlapping(
+            NonNull::new(_PERCPU_DATA_START as *mut u8).unwrap(),
+            PERCPU_PAGES * PAGE_SIZE,
+        );
+
+        let (interrupt_control, cpuid) = InterruptControl::new();
+
+        init_percpu_area_thiscpu(percpu_area);
+        Self {
+            id: cpuid,
+            gdt: GDT::new(),
+            tss: TSS::new(),
+            percpu_area,
+            interrupt: interrupt_control,
+        }
+    }
+
+    /// Load GDT and TSS in place.
+    ///
+    /// # Safety
+    /// Make sure preemption and interrupt are disabled before calling this function.
+    pub unsafe fn init(self: Pin<&mut Self>) {
+        enable_sse();
+
+        // SAFETY: We don't move the object.
+        let self_mut = self.get_unchecked_mut();
+
+        let tss_addr = addr_of!(self_mut.tss);
+        self_mut.gdt.set_tss(tss_addr as u64);
+        self_mut.gdt.load();
+
+        // SAFETY: `self` is pinned, so are its fields.
+        Pin::new_unchecked(&mut self_mut.interrupt).setup_idt();
+        self_mut.interrupt.setup_timer();
+    }
+
+    /// Bootstrap all CPUs.
+    /// This should only be called on the BSP.
+    pub unsafe fn bootstrap_cpus(&self) {
+        self.interrupt.send_sipi();
+    }
+
+    pub unsafe fn set_rsp0(&mut self, rsp: u64) {
+        self.tss.set_rsp0(rsp);
+    }
+
+    pub unsafe fn set_tls32(&mut self, desc: GDTEntry) {
+        self.gdt.set_tls32(desc);
+    }
+
+    pub fn cpuid(&self) -> usize {
+        self.id
+    }
+}
+
+#[macro_export]
+macro_rules! define_smp_bootstrap {
+    ($cpu_count:literal, $ap_entry:ident, $alloc_kstack:tt) => {
+        #[no_mangle]
+        static BOOT_SEMAPHORE: core::sync::atomic::AtomicU64 =
+            core::sync::atomic::AtomicU64::new(0);
+        #[no_mangle]
+        static BOOT_STACK: core::sync::atomic::AtomicU64 =
+            core::sync::atomic::AtomicU64::new(0);
+
+        #[no_mangle]
+        static CPU_COUNT: core::sync::atomic::AtomicU64 =
+            core::sync::atomic::AtomicU64::new(0);
+
+        core::arch::global_asm!(
+            r#"
+        .pushsection .stage1.smp
+        .code16
+        .globl ap_bootstrap
+        .type ap_bootstrap, @function
+        ap_bootstrap:
+            ljmp $0x0, $.Lap1
+
+        .Lap1:
+            # we use the shared gdt for cpu bootstrapping
+            lgdt .Lshared_gdt_desc
+
+            # set msr
+            mov $0xc0000080, %ecx
+            rdmsr
+            or $0x901, %eax # set LME, NXE, SCE
+            wrmsr
+
+            # set cr4
+            mov %cr4, %eax
+            or $0xa0, %eax # set PAE, PGE
+            mov %eax, %cr4
+
+            # load new page table
+            mov ${KERNEL_PML4}, %eax
+            mov %eax, %cr3
+
+            mov %cr0, %eax
+            // SET PE, WP, PG
+            or $0x80010001, %eax
+            mov %eax, %cr0
+
+            ljmp $0x08, $.Lap_bootstrap_end
+
+        .align 16
+        .Lshared_gdt_desc:
+            .8byte 0x0000000000005f
+
+        .code64
+        .Lap_bootstrap_end:
+            mov $0x10, %ax
+            mov %ax, %ds
+            mov %ax, %es
+            mov %ax, %ss
+
+            xor %rsp, %rsp
+            xor %rax, %rax
+            inc %rax
+        1:
+            xchg %rax, {BOOT_SEMAPHORE}
+            cmp $0, %rax
+            je 1f
+            pause
+            jmp 1b
+
+        1:
+            mov {BOOT_STACK}, %rsp # Acquire
+            cmp $0, %rsp
+            jne 1f
+            pause
+            jmp 1b
+
+        1:
+            xor %rax, %rax
+            mov %rax, {BOOT_STACK} # Release
+            xchg %rax, {BOOT_SEMAPHORE}
+
+            lock incq {CPU_COUNT}
+
+            xor %rbp, %rbp
+            push %rbp # NULL return address
+            jmp {AP_ENTRY}
+            .popsection
+            "#,
+            KERNEL_PML4 = const 0x2000,
+            BOOT_SEMAPHORE = sym BOOT_SEMAPHORE,
+            BOOT_STACK = sym BOOT_STACK,
+            CPU_COUNT = sym CPU_COUNT,
+            AP_ENTRY = sym $ap_entry,
+            options(att_syntax),
+        );
+
+        pub unsafe fn wait_cpus_online() {
+            use core::sync::atomic::Ordering;
+            while CPU_COUNT.load(Ordering::Acquire) != $cpu_count - 1 {
+                if BOOT_STACK.load(Ordering::Acquire) == 0 {
+                    let stack_bottom = $alloc_kstack as u64;
+                    BOOT_STACK.store(stack_bottom, Ordering::Release);
+                }
+                $crate::pause();
+            }
+        }
+    };
+}

+ 237 - 0
arch/src/x86_64/interrupt.rs

@@ -0,0 +1,237 @@
+use core::{arch::asm, pin::Pin, ptr::NonNull};
+
+use crate::rdmsr;
+
+use super::pause;
+
+/// Saved registers when a trap (interrupt or exception) occurs.
+#[allow(missing_docs)]
+#[repr(C)]
+#[derive(Debug, Default, Clone, Copy)]
+pub struct InterruptContext {
+    pub rax: u64,
+    pub rbx: u64,
+    pub rcx: u64,
+    pub rdx: u64,
+    pub rdi: u64,
+    pub rsi: u64,
+    pub r8: u64,
+    pub r9: u64,
+    pub r10: u64,
+    pub r11: u64,
+    pub r12: u64,
+    pub r13: u64,
+    pub r14: u64,
+    pub r15: u64,
+    pub rbp: u64,
+
+    pub int_no: u64,
+    pub error_code: u64,
+
+    // Pushed by CPU
+    pub rip: u64,
+    pub cs: u64,
+    pub eflags: u64,
+    pub rsp: u64,
+    pub ss: u64,
+}
+
+#[repr(C)]
+#[derive(Clone, Copy)]
+struct IDTEntry {
+    offset_low: u16,
+    selector: u16,
+
+    interrupt_stack: u8,
+    attributes: u8,
+
+    offset_mid: u16,
+    offset_high: u32,
+    reserved: u32,
+}
+
+pub struct APICReg(*mut u32);
+pub struct APICRegs {
+    base: NonNull<u32>,
+}
+
+/// Architecture-specific interrupt control block.
+pub struct InterruptControl {
+    idt: [IDTEntry; 256],
+    apic_base: APICRegs,
+}
+
+impl IDTEntry {
+    const fn new(offset: usize, selector: u16, attributes: u8) -> Self {
+        Self {
+            offset_low: offset as u16,
+            selector,
+            interrupt_stack: 0,
+            attributes,
+            offset_mid: (offset >> 16) as u16,
+            offset_high: (offset >> 32) as u32,
+            reserved: 0,
+        }
+    }
+
+    const fn null() -> Self {
+        Self {
+            offset_low: 0,
+            selector: 0,
+            interrupt_stack: 0,
+            attributes: 0,
+            offset_mid: 0,
+            offset_high: 0,
+            reserved: 0,
+        }
+    }
+}
+
+impl APICReg {
+    fn new(pointer: *mut u32) -> Self {
+        Self(pointer)
+    }
+
+    pub fn read(&self) -> u32 {
+        unsafe { self.0.read_volatile() }
+    }
+
+    pub fn write(&self, value: u32) {
+        unsafe { self.0.write_volatile(value) }
+    }
+}
+
+impl APICRegs {
+    pub fn local_apic_id(&self) -> APICReg {
+        unsafe { APICReg::new(self.base.byte_offset(0x20).as_ptr()) }
+    }
+
+    pub fn task_priority(&self) -> APICReg {
+        unsafe { APICReg::new(self.base.byte_offset(0x80).as_ptr()) }
+    }
+
+    pub fn end_of_interrupt(&self) {
+        unsafe { APICReg::new(self.base.byte_offset(0xb0).as_ptr()).write(0) }
+    }
+
+    pub fn spurious(&self) -> APICReg {
+        unsafe { APICReg::new(self.base.byte_offset(0xf0).as_ptr()) }
+    }
+
+    pub fn interrupt_command(&self) -> APICReg {
+        unsafe { APICReg::new(self.base.byte_offset(0x300).as_ptr()) }
+    }
+
+    pub fn timer_register(&self) -> APICReg {
+        unsafe { APICReg::new(self.base.byte_offset(0x320).as_ptr()) }
+    }
+
+    pub fn timer_initial_count(&self) -> APICReg {
+        unsafe { APICReg::new(self.base.byte_offset(0x380).as_ptr()) }
+    }
+
+    pub fn timer_current_count(&self) -> APICReg {
+        unsafe { APICReg::new(self.base.byte_offset(0x390).as_ptr()) }
+    }
+
+    pub fn timer_divide(&self) -> APICReg {
+        unsafe { APICReg::new(self.base.byte_offset(0x3e0).as_ptr()) }
+    }
+}
+
+impl InterruptControl {
+    /// # Return
+    /// Returns a tuple of InterruptControl and the cpu id of the current cpu.
+    pub unsafe fn new() -> (Self, usize) {
+        extern "C" {
+            static ISR_START_ADDR: usize;
+        }
+
+        let idt = core::array::from_fn(|idx| match idx {
+            0..0x80 => IDTEntry::new(unsafe { ISR_START_ADDR } + 8 * idx, 0x08, 0x8e),
+            0x80 => IDTEntry::new(unsafe { ISR_START_ADDR } + 8 * idx, 0x08, 0xee),
+            _ => IDTEntry::null(),
+        });
+
+        let apic_base = {
+            let apic_base = rdmsr(0x1b);
+            assert_eq!(apic_base & 0x800, 0x800, "LAPIC not enabled");
+
+            let apic_base = ((apic_base & !0xfff) + 0xffffff00_00000000) as *mut u32;
+            APICRegs {
+                // TODO: A better way to convert to physical address
+                base: NonNull::new(apic_base).expect("Invalid APIC base"),
+            }
+        };
+
+        // Make sure APIC is enabled.
+        apic_base.spurious().write(0x1ff);
+
+        let cpuid = apic_base.local_apic_id().read() >> 24;
+
+        (Self { idt, apic_base }, cpuid as usize)
+    }
+
+    pub fn setup_timer(&self) {
+        self.apic_base.task_priority().write(0);
+        self.apic_base.timer_divide().write(0x3); // Divide by 16
+        self.apic_base.timer_register().write(0x20040);
+
+        // TODO: Get the bus frequency from...?
+        let freq = 200;
+        let count = freq * 1_000_000 / 16 / 100;
+        self.apic_base.timer_initial_count().write(count as u32);
+    }
+
+    pub fn setup_idt(self: Pin<&mut Self>) {
+        lidt(
+            self.idt.as_ptr() as usize,
+            (size_of::<IDTEntry>() * 256 - 1) as u16,
+        );
+    }
+
+    pub fn send_sipi(&self) {
+        let icr = self.apic_base.interrupt_command();
+
+        icr.write(0xc4500);
+        while icr.read() & 0x1000 != 0 {
+            pause();
+        }
+
+        icr.write(0xc4601);
+        while icr.read() & 0x1000 != 0 {
+            pause();
+        }
+    }
+
+    /// Send EOI to APIC so that it can send more interrupts.
+    pub fn end_of_interrupt(&self) {
+        self.apic_base.end_of_interrupt()
+    }
+}
+
+pub fn enable_irqs() {
+    unsafe {
+        asm!("sti");
+    }
+}
+
+pub fn disable_irqs() {
+    unsafe {
+        asm!("cli");
+    }
+}
+
+fn lidt(base: usize, limit: u16) {
+    let mut idt_descriptor = [0u16; 5];
+
+    idt_descriptor[0] = limit;
+    idt_descriptor[1] = base as u16;
+    idt_descriptor[2] = (base >> 16) as u16;
+    idt_descriptor[3] = (base >> 32) as u16;
+    idt_descriptor[4] = (base >> 48) as u16;
+
+    unsafe {
+        asm!("lidt ({})", in(reg) &idt_descriptor, options(att_syntax));
+    }
+}

+ 214 - 0
arch/src/x86_64/interrupt.s

@@ -0,0 +1,214 @@
+.text
+
+#define RAX     0x00
+#define RBX     0x08
+#define RCX     0x10
+#define RDX     0x18
+#define RDI     0x20
+#define RSI     0x28
+#define R8      0x30
+#define R9      0x38
+#define R10     0x40
+#define R11     0x48
+#define R12     0x50
+#define R13     0x58
+#define R14     0x60
+#define R15     0x68
+#define RBP     0x70
+#define INT_NO  0x78
+#define ERRCODE 0x80
+#define RIP     0x88
+#define CS      0x90
+#define FLAGS   0x98
+#define RSP     0xa0
+#define SS      0xa8
+
+.macro movcfi reg, offset
+	mov \reg, \offset(%rsp)
+	.cfi_rel_offset \reg, \offset
+.endm
+
+.macro movrst reg, offset
+	mov \offset(%rsp), \reg
+	.cfi_restore \reg
+.endm
+
+ISR_stub:
+	.cfi_startproc
+	.cfi_signal_frame
+	.cfi_def_cfa_offset 0x18
+	.cfi_offset %rsp, 0x10
+
+	sub $0x78, %rsp
+	.cfi_def_cfa_offset 0x90
+
+	movcfi %rax, RAX
+	movcfi %rbx, RBX
+	movcfi %rcx, RCX
+	movcfi %rdx, RDX
+	movcfi %rdi, RDI
+	movcfi %rsi, RSI
+	movcfi %r8,  R8
+	movcfi %r9,  R9
+	movcfi %r10, R10
+	movcfi %r11, R11
+	movcfi %r12, R12
+	movcfi %r13, R13
+	movcfi %r14, R14
+	movcfi %r15, R15
+	movcfi %rbp, RBP
+
+	mov INT_NO(%rsp), %rax
+	sub $ISR0, %rax
+	shr $3, %rax
+	mov %rax, INT_NO(%rsp)
+
+	mov %rsp, %rbx
+	.cfi_def_cfa_register %rbx
+
+	and $~0xf, %rsp
+	sub $512, %rsp
+	fxsave (%rsp)
+
+	mov %rbx, %rdi
+	mov %rsp, %rsi
+	call interrupt_handler
+
+ISR_stub_restore:
+	fxrstor (%rsp)
+	mov %rbx, %rsp
+	.cfi_def_cfa_register %rsp
+
+	movrst %rax, RAX
+	movrst %rbx, RBX
+	movrst %rcx, RCX
+	movrst %rdx, RDX
+	movrst %rdi, RDI
+	movrst %rsi, RSI
+	movrst %r8,  R8
+	movrst %r9,  R9
+	movrst %r10, R10
+	movrst %r11, R11
+	movrst %r12, R12
+	movrst %r13, R13
+	movrst %r14, R14
+	movrst %r15, R15
+	movrst %rbp, RBP
+
+	add $0x88, %rsp
+	.cfi_def_cfa_offset 0x08
+
+	iretq
+	.cfi_endproc
+
+.altmacro
+.macro build_isr_no_err name
+	.align 8
+	.globl ISR\name
+	.type  ISR\name @function
+	ISR\name:
+		.cfi_startproc
+		.cfi_signal_frame
+		.cfi_def_cfa_offset 0x08
+		.cfi_offset %rsp, 0x10
+
+		.cfi_same_value %rax
+		.cfi_same_value %rbx
+		.cfi_same_value %rcx
+		.cfi_same_value %rdx
+		.cfi_same_value %rdi
+		.cfi_same_value %rsi
+		.cfi_same_value %r8
+		.cfi_same_value %r9
+		.cfi_same_value %r10
+		.cfi_same_value %r11
+		.cfi_same_value %r12
+		.cfi_same_value %r13
+		.cfi_same_value %r14
+		.cfi_same_value %r15
+		.cfi_same_value %rbp
+
+		push %rbp # push placeholder for error code
+		.cfi_def_cfa_offset 0x10
+
+		call ISR_stub
+		.cfi_endproc
+.endm
+
+.altmacro
+.macro build_isr_err name
+	.align 8
+	.globl ISR\name
+	.type  ISR\name @function
+	ISR\name:
+		.cfi_startproc
+		.cfi_signal_frame
+		.cfi_def_cfa_offset 0x10
+		.cfi_offset %rsp, 0x10
+
+		.cfi_same_value %rax
+		.cfi_same_value %rbx
+		.cfi_same_value %rcx
+		.cfi_same_value %rdx
+		.cfi_same_value %rdi
+		.cfi_same_value %rsi
+		.cfi_same_value %r8
+		.cfi_same_value %r9
+		.cfi_same_value %r10
+		.cfi_same_value %r11
+		.cfi_same_value %r12
+		.cfi_same_value %r13
+		.cfi_same_value %r14
+		.cfi_same_value %r15
+		.cfi_same_value %rbp
+
+		call ISR_stub
+		.cfi_endproc
+.endm
+
+build_isr_no_err 0
+build_isr_no_err 1
+build_isr_no_err 2
+build_isr_no_err 3
+build_isr_no_err 4
+build_isr_no_err 5
+build_isr_no_err 6
+build_isr_no_err 7
+build_isr_err    8
+build_isr_no_err 9
+build_isr_err    10
+build_isr_err    11
+build_isr_err    12
+build_isr_err    13
+build_isr_err    14
+build_isr_no_err 15
+build_isr_no_err 16
+build_isr_err    17
+build_isr_no_err 18
+build_isr_no_err 19
+build_isr_no_err 20
+build_isr_err    21
+build_isr_no_err 22
+build_isr_no_err 23
+build_isr_no_err 24
+build_isr_no_err 25
+build_isr_no_err 26
+build_isr_no_err 27
+build_isr_no_err 28
+build_isr_err    29
+build_isr_err    30
+build_isr_no_err 31
+
+.set i, 32
+.rept 0x80+1
+	build_isr_no_err %i
+	.set i, i+1
+.endr
+
+.section .rodata
+
+.align 8
+.globl ISR_START_ADDR
+.type  ISR_START_ADDR @object
+ISR_START_ADDR:
+	.quad ISR0

+ 0 - 0
arch/x86_64/src/io.rs → arch/src/x86_64/io.rs


+ 134 - 0
arch/src/x86_64/mod.rs

@@ -0,0 +1,134 @@
+mod context;
+mod gdt;
+mod init;
+mod interrupt;
+mod io;
+mod user;
+
+pub(self) mod percpu;
+
+pub use self::context::*;
+pub use self::gdt::*;
+pub use self::init::*;
+pub use self::interrupt::*;
+pub use self::io::*;
+pub use self::user::*;
+pub use percpu_macros::define_percpu;
+
+use core::arch::asm;
+
+#[inline(always)]
+pub fn flush_tlb(vaddr: usize) {
+    unsafe {
+        asm!(
+            "invlpg ({})",
+            in(reg) vaddr,
+            options(att_syntax)
+        );
+    }
+}
+
+#[inline(always)]
+pub fn flush_tlb_all() {
+    unsafe {
+        asm!(
+            "mov %cr3, %rax",
+            "mov %rax, %cr3",
+            out("rax") _,
+            options(att_syntax)
+        );
+    }
+}
+
+#[inline(always)]
+pub fn get_root_page_table() -> usize {
+    let cr3: usize;
+    unsafe {
+        asm!(
+            "mov %cr3, {0}",
+            out(reg) cr3,
+            options(att_syntax)
+        );
+    }
+    cr3
+}
+
+#[inline(always)]
+pub fn set_root_page_table(pfn: usize) {
+    unsafe {
+        asm!(
+            "mov {0}, %cr3",
+            in(reg) pfn,
+            options(att_syntax)
+        );
+    }
+}
+
+#[inline(always)]
+pub fn get_page_fault_address() -> usize {
+    let cr2: usize;
+    unsafe {
+        asm!(
+            "mov %cr2, {}",
+            out(reg) cr2,
+            options(att_syntax)
+        );
+    }
+    cr2
+}
+
+#[inline(always)]
+pub fn halt() {
+    unsafe {
+        asm!("hlt", options(att_syntax, nostack));
+    }
+}
+
+#[inline(always)]
+pub fn pause() {
+    unsafe {
+        asm!("pause", options(att_syntax, nostack));
+    }
+}
+
+#[inline(always)]
+pub fn freeze() -> ! {
+    loop {
+        interrupt::disable_irqs();
+        halt();
+    }
+}
+
+#[inline(always)]
+pub fn rdmsr(msr: u32) -> u64 {
+    let edx: u32;
+    let eax: u32;
+
+    unsafe {
+        asm!(
+            "rdmsr",
+            in("ecx") msr,
+            out("eax") eax,
+            out("edx") edx,
+            options(att_syntax),
+        );
+    }
+
+    (edx as u64) << 32 | eax as u64
+}
+
+#[inline(always)]
+pub fn wrmsr(msr: u32, value: u64) {
+    let eax = value as u32;
+    let edx = (value >> 32) as u32;
+
+    unsafe {
+        asm!(
+            "wrmsr",
+            in("ecx") msr,
+            in("eax") eax,
+            in("edx") edx,
+            options(att_syntax),
+        );
+    }
+}

+ 16 - 0
arch/src/x86_64/percpu.rs

@@ -0,0 +1,16 @@
+use super::wrmsr;
+use core::{arch::asm, ptr::NonNull};
+
+fn save_percpu_pointer(percpu_area_base: NonNull<u8>) {
+    wrmsr(0xC0000101, percpu_area_base.as_ptr() as u64);
+}
+
+pub unsafe fn init_percpu_area_thiscpu(percpu_area_base: NonNull<u8>) {
+    save_percpu_pointer(percpu_area_base);
+
+    asm!(
+        "movq {}, %gs:0",
+        in(reg) percpu_area_base.as_ptr(),
+        options(att_syntax)
+    );
+}

+ 54 - 0
arch/src/x86_64/user.rs

@@ -0,0 +1,54 @@
+use core::pin::Pin;
+
+use super::{CPUStatus, GDTEntry};
+
+#[derive(Debug, Clone)]
+pub enum UserTLS {
+    /// TODO: This is not used yet.
+    #[allow(dead_code)]
+    TLS64(u64),
+    TLS32 {
+        base: u64,
+        desc: GDTEntry,
+    },
+}
+
+impl UserTLS {
+    /// # Return
+    /// Returns the TLS descriptor and the index of the TLS segment.
+    pub fn new32(base: u32, limit: u32, is_limit_in_pages: bool) -> (Self, u32) {
+        let flags = if is_limit_in_pages { 0xc } else { 0x4 };
+
+        (
+            Self::TLS32 {
+                base: base as u64,
+                desc: GDTEntry::new(base, limit, 0xf2, flags),
+            },
+            7,
+        )
+    }
+
+    pub fn load(&self, cpu_status: Pin<&mut CPUStatus>) {
+        match self {
+            Self::TLS64(base) => {
+                const IA32_KERNEL_GS_BASE: u32 = 0xc0000102;
+                super::wrmsr(IA32_KERNEL_GS_BASE, *base);
+            }
+            Self::TLS32 { base, desc } => {
+                unsafe {
+                    // SAFETY: We don't move the CPUStatus object.
+                    let cpu_mut = cpu_status.get_unchecked_mut();
+                    cpu_mut.set_tls32(*desc);
+                }
+
+                const IA32_KERNEL_GS_BASE: u32 = 0xc0000102;
+                super::wrmsr(IA32_KERNEL_GS_BASE, *base);
+            }
+        }
+    }
+}
+
+pub unsafe fn load_interrupt_stack(cpu_status: Pin<&mut CPUStatus>, stack: u64) {
+    // SAFETY: We don't move the CPUStatus object.
+    cpu_status.get_unchecked_mut().set_rsp0(stack);
+}

+ 0 - 6
arch/x86_64/Cargo.toml

@@ -1,6 +0,0 @@
-[package]
-name = "x86_64"
-version = "0.1.0"
-edition = "2021"
-
-[dependencies]

+ 0 - 27
arch/x86_64/src/interrupt.rs

@@ -1,27 +0,0 @@
-use core::arch::asm;
-
-pub fn enable() {
-    unsafe {
-        asm!("sti");
-    }
-}
-
-pub fn disable() {
-    unsafe {
-        asm!("cli");
-    }
-}
-
-pub fn lidt(base: usize, limit: u16) {
-    let mut idt_descriptor = [0u16; 5];
-
-    idt_descriptor[0] = limit;
-    idt_descriptor[1] = base as u16;
-    idt_descriptor[2] = (base >> 16) as u16;
-    idt_descriptor[3] = (base >> 32) as u16;
-    idt_descriptor[4] = (base >> 48) as u16;
-
-    unsafe {
-        asm!("lidt ({})", in(reg) &idt_descriptor, options(att_syntax));
-    }
-}

+ 0 - 70
arch/x86_64/src/lib.rs

@@ -1,70 +0,0 @@
-#![no_std]
-
-pub mod vm {
-    use core::arch::asm;
-
-    #[inline(always)]
-    pub fn invlpg(vaddr: usize) {
-        unsafe {
-            asm!(
-                "invlpg ({})",
-                in(reg) vaddr,
-                options(att_syntax)
-            );
-        }
-    }
-
-    #[inline(always)]
-    pub fn invlpg_all() {
-        unsafe {
-            asm!(
-                "mov %cr3, %rax",
-                "mov %rax, %cr3",
-                out("rax") _,
-                options(att_syntax)
-            );
-        }
-    }
-
-    #[inline(always)]
-    pub fn get_cr3() -> usize {
-        let cr3: usize;
-        unsafe {
-            asm!(
-                "mov %cr3, {0}",
-                out(reg) cr3,
-                options(att_syntax)
-            );
-        }
-        cr3
-    }
-
-    #[inline(always)]
-    pub fn set_cr3(pfn: usize) {
-        unsafe {
-            asm!(
-                "mov {0}, %cr3",
-                in(reg) pfn,
-                options(att_syntax)
-            );
-        }
-    }
-
-    #[inline(always)]
-    pub fn get_cr2() -> usize {
-        let cr2: usize;
-        unsafe {
-            asm!(
-                "mov %cr2, {}",
-                out(reg) cr2,
-                options(att_syntax)
-            );
-        }
-        cr2
-    }
-}
-
-pub mod gdt;
-pub mod interrupt;
-pub mod io;
-pub mod task;

+ 0 - 172
arch/x86_64/src/task.rs

@@ -1,172 +0,0 @@
-use core::arch::{asm, global_asm};
-
-use crate::interrupt;
-
-#[repr(C)]
-#[derive(Debug, Clone, Copy)]
-struct SP {
-    low: u32,
-    high: u32,
-}
-
-#[repr(C)]
-pub struct TSS {
-    _reserved1: u32,
-    rsp: [SP; 3],
-    _reserved2: u32,
-    _reserved3: u32,
-    ist: [SP; 7],
-    _reserved4: u32,
-    _reserved5: u32,
-    _reserved6: u16,
-    iomap_base: u16,
-}
-
-impl TSS {
-    pub fn new() -> Self {
-        Self {
-            _reserved1: 0,
-            rsp: [SP { low: 0, high: 0 }; 3],
-            _reserved2: 0,
-            _reserved3: 0,
-            ist: [SP { low: 0, high: 0 }; 7],
-            _reserved4: 0,
-            _reserved5: 0,
-            _reserved6: 0,
-            iomap_base: 0,
-        }
-    }
-
-    pub fn set_rsp0(&mut self, rsp: u64) {
-        self.rsp[0].low = rsp as u32;
-        self.rsp[0].high = (rsp >> 32) as u32;
-    }
-}
-
-#[inline(always)]
-pub fn halt() {
-    unsafe {
-        asm!("hlt", options(att_syntax, nostack));
-    }
-}
-
-#[inline(always)]
-pub fn pause() {
-    unsafe {
-        asm!("pause", options(att_syntax, nostack));
-    }
-}
-
-#[inline(always)]
-pub fn freeze() -> ! {
-    loop {
-        interrupt::disable();
-        halt();
-    }
-}
-
-#[inline(always)]
-pub fn rdmsr(msr: u32) -> u64 {
-    let edx: u32;
-    let eax: u32;
-
-    unsafe {
-        asm!(
-            "rdmsr",
-            in("ecx") msr,
-            out("eax") eax,
-            out("edx") edx,
-            options(att_syntax),
-        );
-    }
-
-    (edx as u64) << 32 | eax as u64
-}
-
-#[inline(always)]
-pub fn wrmsr(msr: u32, value: u64) {
-    let eax = value as u32;
-    let edx = (value >> 32) as u32;
-
-    unsafe {
-        asm!(
-            "wrmsr",
-            in("ecx") msr,
-            in("eax") eax,
-            in("edx") edx,
-            options(att_syntax),
-        );
-    }
-}
-
-global_asm!(
-    r"
-    .macro movcfi reg, offset
-        mov \reg, \offset(%rsp)
-        .cfi_rel_offset \reg, \offset
-    .endm
-
-    .macro movrst reg, offset
-        mov \offset(%rsp), \reg
-        .cfi_restore \reg
-    .endm
-
-    .globl __context_switch_light
-    .type __context_switch_light @function
-    __context_switch_light:
-    .cfi_startproc
-
-        pushf
-    .cfi_def_cfa_offset 0x10
-
-        sub $0x38, %rsp  # extra 8 bytes to align to 16 bytes
-    .cfi_def_cfa_offset 0x48
-
-        movcfi %rbx, 0x08
-        movcfi %rbp, 0x10
-        movcfi %r12, 0x18
-        movcfi %r13, 0x20
-        movcfi %r14, 0x28
-        movcfi %r15, 0x30
-
-        push (%rdi)      # save sp of previous stack frame of current
-                         # acts as saving bp
-    .cfi_def_cfa_offset 0x50
-
-        mov %rsp, (%rdi) # save sp of current stack
-        mov (%rsi), %rsp # load sp of target stack
-
-        pop (%rsi)       # load sp of previous stack frame of target
-                         # acts as restoring previous bp
-    .cfi_def_cfa_offset 0x48
-
-        pop %rax         # align to 16 bytes
-    .cfi_def_cfa_offset 0x40
-
-        mov 0x28(%rsp), %r15
-        mov 0x20(%rsp), %r14
-        mov 0x18(%rsp), %r13
-        mov 0x10(%rsp), %r12
-        mov 0x08(%rsp), %rbp
-        mov 0x00(%rsp), %rbx
-
-        add $0x30, %rsp
-    .cfi_def_cfa_offset 0x10
-
-        popf
-    .cfi_def_cfa_offset 0x08
-
-        ret
-    .cfi_endproc
-    ",
-    options(att_syntax),
-);
-
-extern "C" {
-    fn __context_switch_light(current_task_sp: *mut usize, next_task_sp: *mut usize);
-}
-
-#[inline(always)]
-pub fn context_switch_light(current_task_sp: *mut usize, next_task_sp: *mut usize) {
-    unsafe { __context_switch_light(current_task_sp, next_task_sp) }
-}

+ 1 - 1
global_find.sh

@@ -5,7 +5,7 @@
 do_find()
 {
     for ext in $2; do
-        find src include -name "*.$ext" -exec grep -n --color=always -H -i "$1" {} \; -exec echo "" \;
+        find arch src include -name "*.$ext" -exec grep -n --color=always -H -i "$1" {} \; -exec echo "" \;
     done
 }
 

+ 0 - 74
src/boot.s

@@ -321,77 +321,3 @@ start_64bit:
     cli
     hlt
     jmp .L64bit_hlt
-
-.section .stage1.smp
-.code16
-
-.globl ap_bootstrap
-.type ap_bootstrap, @function
-ap_bootstrap:
-	ljmp $0x0, $.Lap1
-
-.Lap1:
-    # we use a shared gdt for now
-	lgdt shared_gdt_desc
-
-    # set msr
-    mov $0xc0000080, %ecx
-    rdmsr
-    or $0x901, %eax # set LME, NXE, SCE
-    wrmsr
-
-    # set cr4
-    mov %cr4, %eax
-    or $0xa0, %eax # set PAE, PGE
-    mov %eax, %cr4
-
-    # load new page table
-    mov $KERNEL_PML4, %eax
-    mov %eax, %cr3
-
-    mov %cr0, %eax
-    // SET PE, WP, PG
-    or $0x80010001, %eax
-    mov %eax, %cr0
-
-	ljmp $0x08, $.Lap_bootstrap_end
-
-.align 16
-shared_gdt_desc:
-	.8byte 0x0000000000005f
-
-.code64
-.Lap_bootstrap_end:
-    mov $0x10, %ax
-	mov %ax, %ds
-	mov %ax, %es
-	mov %ax, %ss
-
-	xor %rsp, %rsp
-	xor %rax, %rax
-	inc %rax
-1:
-	xchg %rax, BOOT_SEMAPHORE
-	cmp $0, %rax
-	je 1f
-	pause
-	jmp 1b
-
-1:
-	mov BOOT_STACK, %rsp # Acquire
-	cmp $0, %rsp
-	jne 1f
-	pause
-	jmp 1b
-
-1:
-	xor %rax, %rax
-	mov %rax, BOOT_STACK # Release
-	xchg %rax, BOOT_SEMAPHORE
-
-	xor %rbp, %rbp
-	mov %rsp, %rdi # stack area start address as the first argument
-
-	add $0x200000, %rsp # kernel stack order 9
-	push %rbp # NULL return address
-	jmp ap_entry

+ 2 - 2
src/driver.rs

@@ -13,10 +13,10 @@ impl Port8 {
     }
 
     pub fn read(&self) -> u8 {
-        arch::io::inb(self.no)
+        arch::inb(self.no)
     }
 
     pub fn write(&self, data: u8) {
-        arch::io::outb(self.no, data)
+        arch::outb(self.no, data)
     }
 }

+ 8 - 0
src/elf.rs

@@ -14,6 +14,7 @@ use crate::{
 };
 
 #[repr(u8)]
+#[allow(dead_code)]
 #[derive(Clone, Copy, PartialEq, Eq)]
 pub enum ElfFormat {
     Elf32 = 1,
@@ -21,6 +22,7 @@ pub enum ElfFormat {
 }
 
 #[repr(u8)]
+#[allow(dead_code)]
 #[derive(Clone, Copy, PartialEq, Eq)]
 pub enum ElfEndian {
     Little = 1,
@@ -28,6 +30,7 @@ pub enum ElfEndian {
 }
 
 #[repr(u8)]
+#[allow(dead_code)]
 #[derive(Clone, Copy, PartialEq, Eq)]
 pub enum ElfABI {
     // SystemV = 0,
@@ -35,6 +38,7 @@ pub enum ElfABI {
 }
 
 #[repr(u16)]
+#[allow(dead_code)]
 #[derive(Clone, Copy, PartialEq, Eq)]
 pub enum ElfType {
     Relocatable = 1,
@@ -44,6 +48,7 @@ pub enum ElfType {
 }
 
 #[repr(u16)]
+#[allow(dead_code)]
 #[derive(Clone, Copy, PartialEq, Eq)]
 pub enum ElfArch {
     X86 = 0x03,
@@ -71,6 +76,7 @@ bitflags! {
     }
 }
 
+#[allow(dead_code)]
 #[derive(Default, Clone, Copy, PartialEq, Eq)]
 pub enum Elf32PhType {
     #[default]
@@ -88,6 +94,7 @@ pub enum Elf32PhType {
     Hiproc = 0x7fffffff,
 }
 
+#[allow(dead_code)]
 #[derive(Default, Clone, Copy, PartialEq, Eq)]
 pub enum Elf32ShType {
     #[default]
@@ -171,6 +178,7 @@ pub struct Elf32ShEntry {
     pub entry_size: u32,
 }
 
+#[allow(dead_code)]
 pub struct ParsedElf32 {
     entry: u32,
     file: Arc<Dentry>,

+ 1 - 1
src/kernel.rs

@@ -1,7 +1,7 @@
-pub mod arch;
 pub mod block;
 pub mod console;
 pub mod constants;
+pub mod cpu;
 pub mod interrupt;
 pub mod mem;
 pub mod syscall;

+ 0 - 5
src/kernel/arch.rs

@@ -1,5 +0,0 @@
-#[cfg(target_arch = "x86_64")]
-pub mod x86_64;
-
-#[cfg(target_arch = "x86_64")]
-pub use x86_64::*;

+ 0 - 82
src/kernel/arch/x86_64.rs

@@ -1,82 +0,0 @@
-pub mod init;
-pub mod interrupt;
-
-use arch::x86_64::{gdt::GDT, task::TSS};
-
-// TODO!!!: This can be stored in the percpu area.
-//          But we need to implement a guard that ensures that preemption is disabled
-//          while we are accessing the percpu variables.
-#[arch::define_percpu]
-static GDT_OBJECT: Option<GDT> = None;
-
-#[arch::define_percpu]
-static TSS_OBJECT: Option<TSS> = None;
-
-pub mod user {
-    use crate::sync::preempt;
-    use arch::x86_64::gdt::GDTEntry;
-
-    pub struct InterruptStack(pub u64);
-
-    #[derive(Debug, Clone)]
-    pub enum TLS {
-        /// TODO: This is not used yet.
-        #[allow(dead_code)]
-        TLS64(u64),
-        TLS32 {
-            base: u64,
-            desc: GDTEntry,
-        },
-    }
-
-    impl TLS {
-        /// # Return
-        /// Returns the TLS descriptor and the index of the TLS segment.
-        pub fn new32(base: u32, limit: u32, is_limit_in_pages: bool) -> (Self, u32) {
-            let flags = if is_limit_in_pages { 0xc } else { 0x4 };
-
-            (
-                TLS::TLS32 {
-                    base: base as u64,
-                    desc: GDTEntry::new(base, limit, 0xf2, flags),
-                },
-                7,
-            )
-        }
-
-        pub fn load(&self) {
-            match self {
-                TLS::TLS64(base) => {
-                    const IA32_KERNEL_GS_BASE: u32 = 0xc0000102;
-                    arch::x86_64::task::wrmsr(IA32_KERNEL_GS_BASE, *base);
-                }
-                TLS::TLS32 { base, desc } => {
-                    preempt::disable();
-                    let gdt = unsafe {
-                        super::GDT_OBJECT
-                            .as_mut()
-                            .as_mut()
-                            .expect("GDT should be valid")
-                    };
-                    gdt.set_tls32(*desc);
-                    preempt::enable();
-
-                    const IA32_KERNEL_GS_BASE: u32 = 0xc0000102;
-                    arch::x86_64::task::wrmsr(IA32_KERNEL_GS_BASE, *base);
-                }
-            }
-        }
-    }
-
-    pub fn load_interrupt_stack(stack: InterruptStack) {
-        preempt::disable();
-        let tss = unsafe {
-            super::TSS_OBJECT
-                .as_mut()
-                .as_mut()
-                .expect("TSS should be valid")
-        };
-        tss.set_rsp0(stack.0);
-        preempt::enable();
-    }
-}

+ 0 - 126
src/kernel/arch/x86_64/init.rs

@@ -1,126 +0,0 @@
-use super::{interrupt::setup_idt, GDT_OBJECT, TSS_OBJECT};
-use crate::{
-    kernel::{
-        arch::interrupt::APIC_BASE,
-        mem::{paging::Page, phys::PhysPtr as _},
-        smp,
-        task::{ProcessList, Scheduler, Thread},
-    },
-    println_debug, println_info,
-    sync::preempt,
-};
-use alloc::{format, sync::Arc};
-use arch::{
-    interrupt,
-    task::pause,
-    x86_64::{gdt::GDT, task::TSS},
-};
-use core::sync::atomic::{AtomicU32, AtomicUsize, Ordering};
-
-unsafe fn init_gdt_tss_thiscpu() {
-    preempt::disable();
-    let gdt_ref = unsafe { GDT_OBJECT.as_mut() };
-    let tss_ref = unsafe { TSS_OBJECT.as_mut() };
-    *gdt_ref = Some(GDT::new());
-    *tss_ref = Some(TSS::new());
-
-    if let Some(gdt) = gdt_ref.as_mut() {
-        if let Some(tss) = tss_ref.as_mut() {
-            gdt.set_tss(tss as *mut _ as u64);
-        } else {
-            panic!("TSS is not initialized");
-        }
-
-        unsafe { gdt.load() };
-    } else {
-        panic!("GDT is not initialized");
-    }
-
-    preempt::enable();
-}
-
-/// Initialization routine for all CPUs.
-pub unsafe fn init_cpu() {
-    arch::x86_64::io::enable_sse();
-
-    let area = smp::alloc_percpu_area();
-    smp::set_percpu_area(area);
-    init_gdt_tss_thiscpu();
-
-    setup_idt();
-
-    APIC_BASE.spurious().write(0x1ff);
-    APIC_BASE.task_priority().write(0);
-    APIC_BASE.timer_divide().write(0x3); // Divide by 16
-    APIC_BASE.timer_register().write(0x20040);
-
-    // TODO: Get the bus frequency from...?
-    let freq = 800;
-    let count = freq * 1_000_000 / 16 / 100;
-    APIC_BASE.timer_initial_count().write(count as u32);
-
-    let cpu = CPU_COUNT.fetch_add(1, Ordering::Relaxed);
-    if cpu != 0 {
-        // Application processor
-        println_debug!("AP{} started", cpu);
-    }
-}
-
-#[no_mangle]
-pub static BOOT_SEMAPHORE: AtomicU32 = AtomicU32::new(0);
-#[no_mangle]
-pub static BOOT_STACK: AtomicUsize = AtomicUsize::new(0);
-
-pub static CPU_COUNT: AtomicUsize = AtomicUsize::new(0);
-
-#[no_mangle]
-pub unsafe extern "C" fn ap_entry(stack_start: u64) {
-    init_cpu();
-
-    let idle_process = ProcessList::get()
-        .try_find_process(0)
-        .expect("Idle process must exist");
-
-    let idle_thread_name = format!("[kernel idle#AP{}]", 0);
-    let idle_thread = Thread::new_for_init(Arc::from(idle_thread_name.as_bytes()), &idle_process);
-    ProcessList::get().add_thread(&idle_thread);
-    Scheduler::set_idle(idle_thread.clone());
-    Scheduler::set_current(idle_thread);
-
-    preempt::disable();
-    interrupt::enable();
-
-    // TODO!!!!!: Free the stack after having switched to idle task.
-    arch::task::context_switch_light(
-        stack_start as *mut _, // We will never come back
-        unsafe { Scheduler::idle_task().get_sp_ptr() },
-    );
-    arch::task::freeze()
-}
-
-pub unsafe fn bootstrap_cpus() {
-    let icr = APIC_BASE.interrupt_command();
-
-    icr.write(0xc4500);
-    while icr.read() & 0x1000 != 0 {
-        pause();
-    }
-
-    icr.write(0xc4601);
-    while icr.read() & 0x1000 != 0 {
-        pause();
-    }
-
-    while CPU_COUNT.load(Ordering::Acquire) != 4 {
-        if BOOT_STACK.load(Ordering::Acquire) == 0 {
-            let page = Page::alloc_many(9);
-            let stack_start = page.as_cached().as_ptr::<()>() as usize;
-            core::mem::forget(page);
-
-            BOOT_STACK.store(stack_start, Ordering::Release);
-        }
-        pause();
-    }
-
-    println_info!("Processors startup finished");
-}

+ 0 - 129
src/kernel/arch/x86_64/interrupt.rs

@@ -1,129 +0,0 @@
-use crate::kernel::mem::phys::{CachedPP, PhysPtr as _};
-use arch::task::rdmsr;
-use lazy_static::lazy_static;
-
-extern "C" {
-    static ISR_START_ADDR: usize;
-}
-
-#[repr(C)]
-#[derive(Clone, Copy)]
-struct IDTEntry {
-    offset_low: u16,
-    selector: u16,
-
-    interrupt_stack: u8,
-    attributes: u8,
-
-    offset_mid: u16,
-    offset_high: u32,
-    reserved: u32,
-}
-
-impl IDTEntry {
-    const fn new(offset: usize, selector: u16, attributes: u8) -> Self {
-        Self {
-            offset_low: offset as u16,
-            selector,
-            interrupt_stack: 0,
-            attributes,
-            offset_mid: (offset >> 16) as u16,
-            offset_high: (offset >> 32) as u32,
-            reserved: 0,
-        }
-    }
-
-    const fn null() -> Self {
-        Self {
-            offset_low: 0,
-            selector: 0,
-            interrupt_stack: 0,
-            attributes: 0,
-            offset_mid: 0,
-            offset_high: 0,
-            reserved: 0,
-        }
-    }
-}
-
-pub struct APICReg(*mut u32);
-pub struct APICRegs {
-    base: CachedPP,
-}
-
-impl APICReg {
-    fn new(pointer: *mut u32) -> Self {
-        Self(pointer)
-    }
-
-    pub fn read(&self) -> u32 {
-        unsafe { self.0.read_volatile() }
-    }
-
-    pub fn write(&self, value: u32) {
-        unsafe { self.0.write_volatile(value) }
-    }
-}
-
-impl APICRegs {
-    pub fn spurious(&self) -> APICReg {
-        APICReg::new(self.base.offset(0xf0).as_ptr())
-    }
-
-    pub fn task_priority(&self) -> APICReg {
-        APICReg::new(self.base.offset(0x80).as_ptr())
-    }
-
-    pub fn end_of_interrupt(&self) {
-        APICReg::new(self.base.offset(0xb0).as_ptr()).write(0)
-    }
-
-    pub fn interrupt_command(&self) -> APICReg {
-        APICReg::new(self.base.offset(0x300).as_ptr())
-    }
-
-    pub fn timer_register(&self) -> APICReg {
-        APICReg::new(self.base.offset(0x320).as_ptr())
-    }
-
-    pub fn timer_initial_count(&self) -> APICReg {
-        APICReg::new(self.base.offset(0x380).as_ptr())
-    }
-
-    pub fn timer_current_count(&self) -> APICReg {
-        APICReg::new(self.base.offset(0x390).as_ptr())
-    }
-
-    pub fn timer_divide(&self) -> APICReg {
-        APICReg::new(self.base.offset(0x3e0).as_ptr())
-    }
-}
-
-lazy_static! {
-    static ref IDT: [IDTEntry; 256] = core::array::from_fn(|idx| match idx {
-        0..0x80 => IDTEntry::new(unsafe { ISR_START_ADDR } + 8 * idx, 0x08, 0x8e),
-        0x80 => IDTEntry::new(unsafe { ISR_START_ADDR } + 8 * idx, 0x08, 0xee),
-        _ => IDTEntry::null(),
-    });
-    pub static ref APIC_BASE: APICRegs = {
-        let apic_base = rdmsr(0x1b);
-        assert_eq!(apic_base & 0x800, 0x800, "LAPIC not enabled");
-        assert_eq!(apic_base & 0x100, 0x100, "Is not bootstrap processor");
-
-        let apic_base = apic_base & !0xfff;
-        APICRegs {
-            base: CachedPP::new(apic_base as usize),
-        }
-    };
-}
-
-pub fn setup_idt() {
-    arch::x86_64::interrupt::lidt(
-        IDT.as_ptr() as usize,
-        (size_of::<IDTEntry>() * 256 - 1) as u16,
-    );
-}
-
-pub fn end_of_interrupt() {
-    APIC_BASE.end_of_interrupt()
-}

+ 30 - 0
src/kernel/cpu.rs

@@ -0,0 +1,30 @@
+use core::{pin::Pin, ptr::NonNull};
+
+use arch::CPUStatus;
+
+use super::mem::{paging::Page, phys::PhysPtr as _};
+
+#[arch::define_percpu]
+static CPU_STATUS: Option<CPUStatus> = None;
+
+/// # Safety
+/// This function is unsafe because it needs preemption to be disabled.
+pub unsafe fn current_cpu() -> Pin<&'static mut CPUStatus> {
+    // SAFETY: `CPU_STATUS` is global static and initialized only once.
+    unsafe { Pin::new_unchecked(CPU_STATUS.as_mut().as_mut().unwrap()) }
+}
+
+pub unsafe fn init_thiscpu() {
+    CPU_STATUS.set(Some(arch::CPUStatus::new_thiscpu(|layout| {
+        // TODO: Use page size defined in `arch`.
+        let page_count = (layout.size() + 0x1000 - 1) / 0x1000;
+        let page = Page::alloc_ceil(page_count);
+        let pointer = page.as_cached().as_ptr();
+        core::mem::forget(page);
+
+        NonNull::new(pointer).expect("Allocated page pfn should be non-null")
+    })));
+
+    // SAFETY: `CPU_STATUS` is global static and initialized only once.
+    current_cpu().init();
+}

+ 11 - 5
src/kernel/interrupt.rs

@@ -1,10 +1,12 @@
 use alloc::sync::Arc;
 
+use arch::InterruptContext;
 use lazy_static::lazy_static;
 
-use crate::bindings::root::{interrupt_stack, mmx_registers, EINVAL};
+use crate::bindings::root::{mmx_registers, EINVAL};
 use crate::{driver::Port8, prelude::*};
 
+use super::cpu::current_cpu;
 use super::mem::handle_page_fault;
 use super::syscall::handle_syscall32;
 use super::task::{ProcessList, Signal};
@@ -34,7 +36,7 @@ fn irq_handler(irqno: usize) {
     }
 }
 
-fn fault_handler(int_stack: &mut interrupt_stack) {
+fn fault_handler(int_stack: &mut InterruptContext) {
     match int_stack.int_no {
         // Invalid Op or Double Fault
         14 => handle_page_fault(int_stack),
@@ -45,7 +47,7 @@ fn fault_handler(int_stack: &mut interrupt_stack) {
 }
 
 #[no_mangle]
-pub extern "C" fn interrupt_handler(int_stack: *mut interrupt_stack, mmxregs: *mut mmx_registers) {
+pub extern "C" fn interrupt_handler(int_stack: *mut InterruptContext, mmxregs: *mut mmx_registers) {
     let int_stack = unsafe { &mut *int_stack };
     let mmxregs = unsafe { &mut *mmxregs };
 
@@ -53,7 +55,7 @@ pub extern "C" fn interrupt_handler(int_stack: *mut interrupt_stack, mmxregs: *m
         // Fault
         0..0x20 => fault_handler(int_stack),
         // Syscall
-        0x80 => handle_syscall32(int_stack.regs.rax as usize, int_stack, mmxregs),
+        0x80 => handle_syscall32(int_stack.rax as usize, int_stack, mmxregs),
         // Timer
         0x40 => timer_interrupt(),
         // IRQ
@@ -75,7 +77,6 @@ where
 }
 
 pub fn init() -> KResult<()> {
-    // TODO: Move this to `arch`
     // Initialize PIC
     PIC1_COMMAND.write(0x11); // edge trigger mode
     PIC1_DATA.write(0x20); // IRQ 0-7 offset
@@ -93,3 +94,8 @@ pub fn init() -> KResult<()> {
 
     Ok(())
 }
+
+pub fn end_of_interrupt() {
+    // SAFETY: We only use this function in irq context, where preemption is disabled.
+    unsafe { current_cpu() }.interrupt.end_of_interrupt();
+}

+ 8 - 8
src/kernel/mem/mm_list/page_fault.rs

@@ -1,8 +1,8 @@
+use arch::InterruptContext;
 use bindings::kernel::mem::paging::pfn_to_page;
 use bindings::{PA_A, PA_ANON, PA_COW, PA_MMAP, PA_P, PA_RW};
 use bitflags::bitflags;
 
-use crate::bindings::root::interrupt_stack;
 use crate::kernel::mem::paging::{Page, PageBuffer};
 use crate::kernel::mem::phys::{CachedPP, PhysPtr};
 use crate::kernel::mem::{Mapping, VRange};
@@ -34,7 +34,7 @@ struct FixEntry {
 impl MMList {
     fn handle_page_fault(
         &self,
-        int_stack: &mut interrupt_stack,
+        int_stack: &mut InterruptContext,
         addr: VAddr,
         error: PageFaultError,
     ) -> Result<(), Signal> {
@@ -157,8 +157,8 @@ extern "C" {
 /// Try to fix the page fault by jumping to the `error` address.
 ///
 /// Panic if we can't find the `ip` in the fix list.
-fn try_page_fault_fix(int_stack: &mut interrupt_stack, addr: VAddr) {
-    let ip = int_stack.v_rip as u64;
+fn try_page_fault_fix(int_stack: &mut InterruptContext, addr: VAddr) {
+    let ip = int_stack.rip as u64;
     // TODO: Use `op_type` to fix.
 
     // SAFETY: `FIX_START` and `FIX_END` are defined in the linker script in `.rodata` section.
@@ -171,7 +171,7 @@ fn try_page_fault_fix(int_stack: &mut interrupt_stack, addr: VAddr) {
 
     for entry in entries.iter() {
         if ip >= entry.start && ip < entry.start + entry.length {
-            int_stack.v_rip = entry.jump_address as usize;
+            int_stack.rip = entry.jump_address as u64;
             return;
         }
     }
@@ -186,9 +186,9 @@ fn kernel_page_fault_die(vaddr: VAddr, ip: usize) -> ! {
     )
 }
 
-pub fn handle_page_fault(int_stack: &mut interrupt_stack) {
+pub fn handle_page_fault(int_stack: &mut InterruptContext) {
     let error = PageFaultError::from_bits_truncate(int_stack.error_code);
-    let vaddr = VAddr(arch::x86_64::vm::get_cr2());
+    let vaddr = VAddr(arch::get_page_fault_address());
 
     let result = Thread::current()
         .process
@@ -199,7 +199,7 @@ pub fn handle_page_fault(int_stack: &mut interrupt_stack) {
         println_debug!(
             "Page fault on {:#x} in user space at {:#x}",
             vaddr.0,
-            int_stack.v_rip
+            int_stack.rip
         );
         ProcessList::kill_current(signal)
     }

+ 6 - 6
src/kernel/mem/page_table.rs

@@ -221,7 +221,7 @@ impl PageTable {
     }
 
     pub fn switch(&self) {
-        arch::vm::switch_page_table(self.page.as_phys())
+        arch::set_root_page_table(self.page.as_phys())
     }
 
     pub fn unmap(&self, area: &MMArea) {
@@ -229,7 +229,7 @@ impl PageTable {
         let use_invlpg = range.len() / 4096 < 4;
         let iter = self.iter_user(range).unwrap();
 
-        if self.page.as_phys() != arch::vm::current_page_table() {
+        if self.page.as_phys() != arch::get_root_page_table() {
             for pte in iter {
                 pte.take();
             }
@@ -241,19 +241,19 @@ impl PageTable {
                 pte.take();
 
                 let pfn = range.start().floor().0 + offset_pages * 4096;
-                arch::vm::invlpg(pfn);
+                arch::flush_tlb(pfn);
             }
         } else {
             for pte in iter {
                 pte.take();
             }
-            arch::vm::invlpg_all();
+            arch::flush_tlb_all();
         }
     }
 
     pub fn lazy_invalidate_tlb_all(&self) {
-        if self.page.as_phys() == arch::vm::current_page_table() {
-            arch::vm::invlpg_all();
+        if self.page.as_phys() == arch::get_root_page_table() {
+            arch::flush_tlb_all();
         }
     }
 

+ 59 - 0
src/kernel/mem/paging.rs

@@ -10,11 +10,59 @@ use core::fmt;
 
 use super::phys::PhysPtr;
 
+fn msb(x: u64) -> u64 {
+    // What the ****.
+    let mut x = x;
+    x |= x >> 1;
+    x |= x >> 2;
+    x |= x >> 4;
+    x |= x >> 8;
+    x |= x >> 16;
+    x |= x >> 32;
+    x -= x >> 1;
+    x
+}
+
+fn msb_position(x: u64) -> Option<u32> {
+    if x == 0 {
+        return None;
+    }
+
+    let mut pos = 0;
+    let mut x = x;
+    if x >= 1 << 32 {
+        x >>= 32;
+        pos += 32;
+    }
+    if x >= 1 << 16 {
+        x >>= 16;
+        pos += 16;
+    }
+    if x >= 1 << 8 {
+        x >>= 8;
+        pos += 8;
+    }
+    if x >= 1 << 4 {
+        x >>= 4;
+        pos += 4;
+    }
+    if x >= 1 << 2 {
+        x >>= 2;
+        pos += 2;
+    }
+    if x >= 1 {
+        pos += 1;
+    }
+
+    Some(pos)
+}
+
 pub struct Page {
     page_ptr: *mut c_page,
     order: u32,
 }
 
+#[allow(dead_code)]
 impl Page {
     pub fn alloc_one() -> Self {
         let page_ptr = unsafe { c_alloc_page() };
@@ -28,6 +76,16 @@ impl Page {
         Self { page_ptr, order }
     }
 
+    /// Allocate a contiguous block of pages that can contain at least `count` pages.
+    pub fn alloc_ceil(count: usize) -> Self {
+        assert_ne!(count, 0);
+        let count_msb = msb(count as u64) as usize;
+        let order = msb_position((count + count_msb - 1) as u64)
+            .expect("`count` can't be 0, so can't `order`");
+
+        Self::alloc_many(order)
+    }
+
     /// Get `Page` from `pfn`, acquiring the ownership of the page. `refcount` is not increased.
     ///
     /// # Safety
@@ -146,6 +204,7 @@ pub struct PageBuffer {
     offset: usize,
 }
 
+#[allow(dead_code)]
 impl PageBuffer {
     pub fn new(page: Page) -> Self {
         Self { page, offset: 0 }

+ 58 - 5
src/kernel/smp.rs

@@ -1,10 +1,63 @@
-mod percpu;
+use alloc::{format, sync::Arc};
+use arch::define_smp_bootstrap;
 
-pub use percpu::{alloc_percpu_area, set_percpu_area};
+use crate::{
+    kernel::{
+        cpu::current_cpu,
+        mem::{paging::Page, phys::PhysPtr as _},
+    },
+    println_debug,
+    sync::preempt,
+};
+
+use super::{
+    cpu::init_thiscpu,
+    task::{ProcessList, Scheduler, Thread},
+};
+
+define_smp_bootstrap!(4, ap_entry, {
+    let page = Page::alloc_many(9);
+    let stack_bottom = page.as_cached().as_ptr::<()>() as usize + page.len();
+    core::mem::forget(page);
+    stack_bottom
+});
+
+unsafe extern "C" fn ap_entry() {
+    init_thiscpu();
+    println_debug!("AP{} started", current_cpu().cpuid());
 
-pub unsafe fn bootstrap_smp() {
-    #[cfg(feature = "smp")]
     {
-        super::arch::init::bootstrap_cpus();
+        let idle_process = ProcessList::get()
+            .try_find_process(0)
+            .expect("Idle process must exist");
+
+        let idle_thread_name = format!("[kernel idle#AP{}]", 0);
+        let idle_thread =
+            Thread::new_for_init(Arc::from(idle_thread_name.as_bytes()), &idle_process);
+        ProcessList::get().add_thread(&idle_thread);
+        Scheduler::set_idle(idle_thread.clone());
+        Scheduler::set_current(idle_thread);
+    }
+
+    preempt::disable();
+    arch::enable_irqs();
+
+    // TODO!!!!!: Free the stack after having switched to idle task.
+
+    // TODO: Temporary solution: we will never access this later on.
+    let mut unuse_ctx = arch::TaskContext::new();
+    let mut unused_area = [0u8; 64];
+    unuse_ctx.init(0, unused_area.as_mut_ptr() as usize);
+    unsafe {
+        arch::TaskContext::switch_to(
+            &mut unuse_ctx, // We will never come back
+            &mut *Scheduler::idle_task().get_context_mut_ptr(),
+        );
     }
+    arch::freeze()
+}
+
+pub unsafe fn bootstrap_smp() {
+    current_cpu().bootstrap_cpus();
+    wait_cpus_online();
 }

+ 0 - 32
src/kernel/smp/percpu.rs

@@ -1,32 +0,0 @@
-use crate::kernel::mem::{paging::Page, phys::PhysPtr as _};
-
-/// # Safety
-/// The memory allocated by this function will never be freed and can only be used
-/// for per-cpu area.
-pub unsafe fn alloc_percpu_area() -> *mut () {
-    extern "C" {
-        static PERCPU_PAGES: usize;
-        fn _PERCPU_DATA_START();
-    }
-    assert_eq!(
-        unsafe { PERCPU_PAGES },
-        1,
-        "We support only 1 page per-cpu variables for now"
-    );
-
-    let page = Page::alloc_one();
-    unsafe {
-        page.as_cached()
-            .as_ptr::<u8>()
-            .copy_from_nonoverlapping(_PERCPU_DATA_START as *const _, page.len())
-    };
-
-    let addr = page.as_cached().as_ptr();
-    core::mem::forget(page);
-
-    addr
-}
-
-pub unsafe fn set_percpu_area(area: *mut ()) {
-    arch::set_percpu_area_thiscpu(area)
-}

+ 25 - 22
src/kernel/syscall.rs

@@ -1,8 +1,11 @@
 use crate::{
-    bindings::root::{interrupt_stack, mmx_registers},
+    bindings::root::{mmx_registers},
     kernel::task::{ProcessList, Signal},
     println_warn,
 };
+use arch::InterruptContext;
+
+extern crate arch;
 
 mod file_rw;
 mod mm;
@@ -75,22 +78,22 @@ impl<'a, T: 'a> MapArgument<'a, *mut T> for MapArgumentImpl {
 
 macro_rules! arg_register {
     (0, $is:ident) => {
-        $is.regs.rbx
+        $is.rbx
     };
     (1, $is:ident) => {
-        $is.regs.rcx
+        $is.rcx
     };
     (2, $is:ident) => {
-        $is.regs.rdx
+        $is.rdx
     };
     (3, $is:ident) => {
-        $is.regs.rsi
+        $is.rsi
     };
     (4, $is:ident) => {
-        $is.regs.rdi
+        $is.rdi
     };
     (5, $is:ident) => {
-        $is.regs.rbp
+        $is.rbp
     };
 }
 
@@ -144,7 +147,7 @@ macro_rules! syscall32_call {
 
 macro_rules! define_syscall32 {
     ($name:ident, $handler:ident) => {
-        fn $name(_int_stack: &mut $crate::bindings::root::interrupt_stack,
+        fn $name(_int_stack: &mut $crate::kernel::syscall::arch::InterruptContext,
             _mmxregs: &mut $crate::bindings::root::mmx_registers) -> usize {
             use $crate::kernel::syscall::MapReturnValue;
 
@@ -156,7 +159,7 @@ macro_rules! define_syscall32 {
     };
     ($name:ident, $handler:ident, $($arg:ident: $argt:ty),*) => {
         fn $name(
-            int_stack: &mut $crate::bindings::root::interrupt_stack,
+            int_stack: &mut $crate::kernel::syscall::arch::InterruptContext,
             _mmxregs: &mut $crate::bindings::root::mmx_registers) -> usize {
             use $crate::kernel::syscall::syscall32_call;
 
@@ -180,13 +183,13 @@ use super::task::Thread;
 pub(self) use {arg_register, define_syscall32, format_expand, register_syscall, syscall32_call};
 
 pub(self) struct SyscallHandler {
-    handler: fn(&mut interrupt_stack, &mut mmx_registers) -> usize,
+    handler: fn(&mut InterruptContext, &mut mmx_registers) -> usize,
     name: &'static str,
 }
 
 pub(self) fn register_syscall_handler(
     no: usize,
-    handler: fn(&mut interrupt_stack, &mut mmx_registers) -> usize,
+    handler: fn(&mut InterruptContext, &mut mmx_registers) -> usize,
     name: &'static str,
 ) {
     // SAFETY: `SYSCALL_HANDLERS` is never modified after initialization.
@@ -210,7 +213,7 @@ const SYSCALL_HANDLERS_SIZE: usize = 404;
 static mut SYSCALL_HANDLERS: [Option<SyscallHandler>; SYSCALL_HANDLERS_SIZE] =
     [const { None }; SYSCALL_HANDLERS_SIZE];
 
-pub fn handle_syscall32(no: usize, int_stack: &mut interrupt_stack, mmxregs: &mut mmx_registers) {
+pub fn handle_syscall32(no: usize, int_stack: &mut InterruptContext, mmxregs: &mut mmx_registers) {
     // SAFETY: `SYSCALL_HANDLERS` are never modified after initialization.
     let syscall = unsafe { SYSCALL_HANDLERS.get(no) }.and_then(Option::as_ref);
 
@@ -220,19 +223,19 @@ pub fn handle_syscall32(no: usize, int_stack: &mut interrupt_stack, mmxregs: &mu
             ProcessList::kill_current(Signal::SIGSYS);
         }
         Some(handler) => {
-            arch::interrupt::enable();
+            arch::enable_irqs();
             let retval = (handler.handler)(int_stack, mmxregs);
 
             // SAFETY: `int_stack` is always valid.
-            int_stack.regs.rax = retval as u64;
-            int_stack.regs.r8 = 0;
-            int_stack.regs.r9 = 0;
-            int_stack.regs.r10 = 0;
-            int_stack.regs.r11 = 0;
-            int_stack.regs.r12 = 0;
-            int_stack.regs.r13 = 0;
-            int_stack.regs.r14 = 0;
-            int_stack.regs.r15 = 0;
+            int_stack.rax = retval as u64;
+            int_stack.r8 = 0;
+            int_stack.r9 = 0;
+            int_stack.r10 = 0;
+            int_stack.r11 = 0;
+            int_stack.r12 = 0;
+            int_stack.r13 = 0;
+            int_stack.r14 = 0;
+            int_stack.r15 = 0;
         }
     }
 

+ 21 - 64
src/kernel/syscall/procops.rs

@@ -1,8 +1,7 @@
-use core::arch::global_asm;
-
 use alloc::borrow::ToOwned;
 use alloc::ffi::CString;
-use bindings::{interrupt_stack, mmx_registers, EINVAL, ENOENT, ENOTDIR, ESRCH};
+use arch::InterruptContext;
+use bindings::{mmx_registers, EINVAL, ENOENT, ENOTDIR, ESRCH};
 use bitflags::bitflags;
 
 use crate::elf::ParsedElf32;
@@ -105,14 +104,14 @@ fn do_execve(exec: &[u8], argv: Vec<CString>, envp: Vec<CString>) -> KResult<(VA
     }
 }
 
-fn sys_execve(int_stack: &mut interrupt_stack, _mmxregs: &mut mmx_registers) -> usize {
+fn sys_execve(int_stack: &mut InterruptContext, _mmxregs: &mut mmx_registers) -> usize {
     match (|| -> KResult<()> {
-        let exec = int_stack.regs.rbx as *const u8;
+        let exec = int_stack.rbx as *const u8;
         let exec = UserString::new(exec)?;
 
         // TODO!!!!!: copy from user
-        let mut argv: UserPointer<u32> = UserPointer::new_vaddr(int_stack.regs.rcx as _)?;
-        let mut envp: UserPointer<u32> = UserPointer::new_vaddr(int_stack.regs.rdx as _)?;
+        let mut argv = UserPointer::<u32>::new_vaddr(int_stack.rcx as _)?;
+        let mut envp = UserPointer::<u32>::new_vaddr(int_stack.rdx as _)?;
 
         let mut argv_vec = Vec::new();
         let mut envp_vec = Vec::new();
@@ -141,8 +140,8 @@ fn sys_execve(int_stack: &mut interrupt_stack, _mmxregs: &mut mmx_registers) ->
 
         let (ip, sp) = do_execve(exec.as_cstr().to_bytes(), argv_vec, envp_vec)?;
 
-        int_stack.v_rip = ip.0;
-        int_stack.rsp = sp.0;
+        int_stack.rip = ip.0 as u64;
+        int_stack.rsp = sp.0 as u64;
         Ok(())
     })() {
         Ok(_) => 0,
@@ -272,7 +271,13 @@ fn do_set_thread_area(desc: *mut UserDescriptor) -> KResult<()> {
     Thread::current().set_thread_area(&mut desc)?;
     desc_pointer.write(desc)?;
 
-    Thread::current().load_thread_area32();
+    // SAFETY: Preemption is disabled on calling `load_thread_area32()`.
+    unsafe {
+        preempt::disable();
+        Thread::current().load_thread_area32();
+        preempt::enable();
+    }
+
     Ok(())
 }
 
@@ -446,65 +451,17 @@ define_syscall32!(sys_rt_sigprocmask, do_rt_sigprocmask,
 define_syscall32!(sys_rt_sigaction, do_rt_sigaction,
     signum: u32, act: *const UserSignalAction, oldact: *mut UserSignalAction, sigsetsize: usize);
 
-extern "C" {
-    fn ISR_stub_restore();
-    fn new_process_return();
-}
-
-unsafe extern "C" fn real_new_process_return() {
-    // We don't land on the typical `Scheduler::schedule()` function, so we need to
-    // manually enable preemption.
-    preempt::enable();
-}
-
-global_asm!(
-    r"
-        .globl new_process_return
-        new_process_return:
-            call {0}
-            jmp {1}
-    ",
-    sym real_new_process_return,
-    sym ISR_stub_restore,
-    options(att_syntax),
-);
-
-fn sys_fork(int_stack: &mut interrupt_stack, mmxregs: &mut mmx_registers) -> usize {
+fn sys_fork(int_stack: &mut InterruptContext, _mmxregs: &mut mmx_registers) -> usize {
     let new_thread = Thread::new_cloned(&Thread::current());
-
-    // TODO: We should make the preparation of the kernel stack more abstract.
-    //       Currently, we can see that we are directly writing to the kernel stack,
-    //       which is platform dependent.
-    new_thread.prepare_kernel_stack(|kstack| {
-        let mut writer = kstack.get_writer();
-
-        // We make the child process return to `ISR_stub_restore`, pretending that we've
-        // just returned from a interrupt handler.
-        writer.entry = new_process_return;
-
-        let mut new_int_stack = int_stack.clone();
-
-        // Child's return value: 0
-        new_int_stack.regs.rax = 0;
-
-        writer.write(new_int_stack);
-
-        // In `ISR_stub_restore`, we will restore the mmx register context, followed by
-        // restoring the stack pointer by moving the value in `rbx` to `rsp`, which should
-        // point to the interrupt stack.
-        writer.rbx = writer.get_current_sp();
-
-        // Push the mmx register context to the stack.
-        writer.write(mmxregs.clone());
-
-        writer.finish();
-    });
-
+    let mut new_int_stack = int_stack.clone();
+    new_int_stack.rax = 0;
+    new_int_stack.eflags = 0x200;
+    new_thread.fork_init(new_int_stack);
     Scheduler::get().lock_irq().uwake(&new_thread);
     new_thread.process.pid as usize
 }
 
-fn sys_sigreturn(int_stack: &mut interrupt_stack, mmxregs: &mut mmx_registers) -> usize {
+fn sys_sigreturn(int_stack: &mut InterruptContext, mmxregs: &mut mmx_registers) -> usize {
     let result = Thread::current().signal_list.restore(int_stack, mmxregs);
     match result {
         Ok(ret) => ret,

+ 2 - 2
src/kernel/task.rs

@@ -8,6 +8,6 @@ pub(self) use kstack::KernelStack;
 pub use scheduler::Scheduler;
 pub use signal::{Signal, SignalAction};
 pub use thread::{
-    init_multitasking, Process, ProcessGroup, ProcessList, Session, Thread, ThreadState,
-    UserDescriptor, UserDescriptorFlags, WaitObject, WaitType,
+    init_multitasking, ProcessList, Session, Thread, ThreadState, UserDescriptor, WaitObject,
+    WaitType,
 };

+ 17 - 94
src/kernel/task/kstack.rs

@@ -1,89 +1,12 @@
 use crate::kernel::{
-    arch::user::{self},
+    cpu::current_cpu,
     mem::{paging::Page, phys::PhysPtr},
 };
-
-use core::cell::UnsafeCell;
+use arch::InterruptContext;
 
 pub struct KernelStack {
     pages: Page,
     bottom: usize,
-    sp: UnsafeCell<usize>,
-}
-
-pub struct KernelStackWriter<'lt> {
-    sp: &'lt mut usize,
-    prev_sp: usize,
-
-    pub entry: unsafe extern "C" fn(),
-    pub flags: usize,
-    pub r15: usize,
-    pub r14: usize,
-    pub r13: usize,
-    pub r12: usize,
-    pub rbp: usize,
-    pub rbx: usize,
-}
-
-unsafe extern "C" fn __not_assigned_entry() {
-    panic!("__not_assigned_entry called");
-}
-
-impl<'lt> KernelStackWriter<'lt> {
-    fn new(sp: &'lt mut usize) -> Self {
-        let prev_sp = *sp;
-
-        Self {
-            sp,
-            entry: __not_assigned_entry,
-            flags: 0,
-            r15: 0,
-            r14: 0,
-            r13: 0,
-            r12: 0,
-            rbp: 0,
-            rbx: 0,
-            prev_sp,
-        }
-    }
-
-    /// `data` and current sp should have an alignment of 16 bytes.
-    /// Otherwise, extra padding is added.
-    pub fn write<T: Copy>(&mut self, data: T) {
-        *self.sp -= core::mem::size_of::<T>();
-        *self.sp &= !0xf; // Align to 16 bytes
-
-        // SAFETY: `sp` is always valid.
-        unsafe {
-            (*self.sp as *mut T).write(data);
-        }
-    }
-
-    pub fn get_current_sp(&self) -> usize {
-        *self.sp
-    }
-
-    fn push(&mut self, val: usize) {
-        *self.sp -= core::mem::size_of::<usize>();
-
-        // SAFETY: `sp` is always valid.
-        unsafe {
-            (*self.sp as *mut usize).write(val);
-        }
-    }
-
-    pub fn finish(mut self) {
-        self.push(self.entry as usize);
-        self.push(self.flags); // rflags
-        self.push(self.r15); // r15
-        self.push(self.r14); // r14
-        self.push(self.r13); // r13
-        self.push(self.r12); // r12
-        self.push(self.rbp); // rbp
-        self.push(self.rbx); // rbx
-        self.push(0); // 0 for alignment
-        self.push(self.prev_sp) // previous sp
-    }
 }
 
 impl KernelStack {
@@ -95,26 +18,26 @@ impl KernelStack {
         let pages = Page::alloc_many(Self::KERNEL_STACK_ORDER);
         let bottom = pages.as_cached().offset(pages.len()).as_ptr::<u8>() as usize;
 
-        Self {
-            pages,
-            bottom,
-            sp: UnsafeCell::new(bottom),
-        }
+        Self { pages, bottom }
     }
 
-    pub fn load_interrupt_stack(&self) {
-        user::load_interrupt_stack(user::InterruptStack(self.bottom as u64));
+    /// # Safety
+    /// This function is unsafe because it accesses the `current_cpu()`, which needs
+    /// to be called in a preemption disabled context.
+    pub unsafe fn load_interrupt_stack(&self) {
+        arch::load_interrupt_stack(current_cpu(), self.bottom as u64);
     }
 
-    pub fn get_writer(&mut self) -> KernelStackWriter {
-        KernelStackWriter::new(self.sp.get_mut())
+    pub fn get_stack_bottom(&self) -> usize {
+        self.bottom
     }
 
-    /// Get a pointer to `self.sp` so we can use it in `context_switch()`.
-    ///
-    /// # Safety
-    /// Save the pointer somewhere or pass it to a function that will use it is UB.
-    pub unsafe fn get_sp_ptr(&self) -> *mut usize {
-        self.sp.get()
+    pub fn init(&self, interrupt_context: InterruptContext) -> usize {
+        let mut sp = self.bottom - core::mem::size_of::<InterruptContext>();
+        sp &= !0xf;
+        unsafe {
+            (sp as *mut InterruptContext).write(interrupt_context);
+        }
+        sp
     }
 }

+ 14 - 10
src/kernel/task/scheduler.rs

@@ -59,13 +59,8 @@ impl Scheduler {
     }
 
     pub unsafe fn set_idle(thread: Arc<Thread>) {
-        thread.prepare_kernel_stack(|kstack| {
-            let mut writer = kstack.get_writer();
-            writer.flags = 0x200;
-            writer.entry = idle_task;
-            writer.finish();
-        });
         // We don't wake the idle thread to prevent from accidentally being scheduled there.
+        thread.init(idle_task as *const () as usize);
 
         let old = IDLE_TASK.swap(NonNull::new(Arc::into_raw(thread) as *mut _));
         assert!(old.is_none(), "Idle task is already set");
@@ -187,7 +182,10 @@ impl Scheduler {
 
 fn context_switch_light(from: &Arc<Thread>, to: &Arc<Thread>) {
     unsafe {
-        arch::task::context_switch_light(from.get_sp_ptr(), to.get_sp_ptr());
+        arch::TaskContext::switch_to(
+            &mut *from.get_context_mut_ptr(),
+            &mut *to.get_context_mut_ptr(),
+        );
     }
 }
 
@@ -215,7 +213,7 @@ extern "C" fn idle_task() {
         // No thread to run, halt the cpu and rerun the loop.
         if scheduler.ready.is_empty() {
             drop(scheduler);
-            arch::task::halt();
+            arch::halt();
             continue;
         }
 
@@ -233,13 +231,19 @@ extern "C" fn idle_task() {
             Scheduler::set_current(next_thread);
         }
 
-        Thread::current().load_interrupt_stack();
-        Thread::current().load_thread_area32();
+        unsafe {
+            // SAFETY: We are in the idle task where preemption is disabled.
+            //         So we can safely load the thread area and interrupt stack.
+            Thread::current().load_interrupt_stack();
+            Thread::current().load_thread_area32();
+        }
 
         // TODO!!!: If the task comes from another cpu, we need to sync.
         //
         // The other cpu should see the changes of kernel stack of the target thread
         // made in this cpu.
+        //
+        // Can we find a better way other than `fence`s?
         fence(Ordering::SeqCst);
         context_switch_light(&Scheduler::idle_task(), &Thread::current());
         fence(Ordering::SeqCst);

+ 16 - 14
src/kernel/task/signal.rs

@@ -10,10 +10,11 @@ use crate::{
 };
 
 use alloc::collections::{binary_heap::BinaryHeap, btree_map::BTreeMap};
+use arch::InterruptContext;
 use bindings::{
-    interrupt_stack, mmx_registers, EFAULT, EINVAL, SA_RESTORER, SIGABRT, SIGBUS, SIGCHLD, SIGCONT,
-    SIGFPE, SIGILL, SIGKILL, SIGQUIT, SIGSEGV, SIGSTOP, SIGSYS, SIGTRAP, SIGTSTP, SIGTTIN, SIGTTOU,
-    SIGURG, SIGWINCH, SIGXCPU, SIGXFSZ,
+    mmx_registers, EFAULT, EINVAL, SA_RESTORER, SIGABRT, SIGBUS, SIGCHLD, SIGCONT, SIGFPE, SIGILL,
+    SIGKILL, SIGQUIT, SIGSEGV, SIGSTOP, SIGSYS, SIGTRAP, SIGTSTP, SIGTTIN, SIGTTOU, SIGURG,
+    SIGWINCH, SIGXCPU, SIGXFSZ,
 };
 
 use super::{ProcessList, Thread};
@@ -21,6 +22,7 @@ use super::{ProcessList, Thread};
 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
 pub struct Signal(u32);
 
+#[allow(dead_code)]
 impl Signal {
     pub const SIGHUP: Signal = Signal(1);
     pub const SIGINT: Signal = Signal(2);
@@ -168,14 +170,14 @@ impl SignalAction {
         &self,
         signum: u32,
         old_mask: u64,
-        int_stack: &mut interrupt_stack,
+        int_stack: &mut InterruptContext,
         mmxregs: &mut mmx_registers,
     ) -> KResult<()> {
         if self.sa_flags & SA_RESTORER as usize == 0 {
             return Err(EINVAL);
         }
 
-        const CONTEXT_SIZE: usize = size_of::<interrupt_stack>()
+        const CONTEXT_SIZE: usize = size_of::<InterruptContext>()
             + size_of::<mmx_registers>()
             + size_of::<usize>() // old_mask
             + size_of::<u32>(); // `sa_handler` argument: `signum`
@@ -183,9 +185,9 @@ impl SignalAction {
         // Save current interrupt context to 128 bytes above current user stack
         // and align to 16 bytes. Then we push the return address of the restorer.
         // TODO!!!: Determine the size of the return address
-        let sp = ((int_stack.rsp - 128 - CONTEXT_SIZE) & !0xf) - size_of::<u32>();
+        let sp = ((int_stack.rsp as usize - 128 - CONTEXT_SIZE) & !0xf) - size_of::<u32>();
         let restorer_address: u32 = self.sa_restorer as u32;
-        let mut stack = UserBuffer::new(sp as *mut _, CONTEXT_SIZE + size_of::<u32>())?;
+        let mut stack = UserBuffer::new(sp as *mut u8, CONTEXT_SIZE + size_of::<u32>())?;
 
         stack.copy(&restorer_address)?.ok_or(EFAULT)?; // Restorer address
         stack.copy(&signum)?.ok_or(EFAULT)?; // Restorer address
@@ -193,8 +195,8 @@ impl SignalAction {
         stack.copy(mmxregs)?.ok_or(EFAULT)?; // MMX registers
         stack.copy(int_stack)?.ok_or(EFAULT)?; // Interrupt stack
 
-        int_stack.v_rip = self.sa_handler;
-        int_stack.rsp = sp;
+        int_stack.rip = self.sa_handler as u64;
+        int_stack.rsp = sp as u64;
         Ok(())
     }
 }
@@ -333,7 +335,7 @@ impl SignalList {
     /// # Safety
     /// This function might never return. Caller must make sure that local variables
     /// that own resources are dropped before calling this function.
-    pub fn handle(&self, int_stack: &mut interrupt_stack, mmxregs: &mut mmx_registers) {
+    pub fn handle(&self, int_stack: &mut InterruptContext, mmxregs: &mut mmx_registers) {
         loop {
             let signal = {
                 let signal = match self.inner.lock_irq().pop() {
@@ -396,18 +398,18 @@ impl SignalList {
     /// used to store the syscall return value to prevent the original value being clobbered.
     pub fn restore(
         &self,
-        int_stack: &mut interrupt_stack,
+        int_stack: &mut InterruptContext,
         mmxregs: &mut mmx_registers,
     ) -> KResult<usize> {
-        let old_mask_vaddr = int_stack.rsp;
+        let old_mask_vaddr = int_stack.rsp as usize;
         let old_mmxregs_vaddr = old_mask_vaddr + size_of::<usize>();
         let old_int_stack_vaddr = old_mmxregs_vaddr + size_of::<mmx_registers>();
 
         let old_mask = UserPointer::<u64>::new_vaddr(old_mask_vaddr)?.read()?;
         *mmxregs = UserPointer::<mmx_registers>::new_vaddr(old_mmxregs_vaddr)?.read()?;
-        *int_stack = UserPointer::<interrupt_stack>::new_vaddr(old_int_stack_vaddr)?.read()?;
+        *int_stack = UserPointer::<InterruptContext>::new_vaddr(old_int_stack_vaddr)?.read()?;
 
         self.inner.lock_irq().set_mask(old_mask);
-        Ok(int_stack.regs.rax as usize)
+        Ok(int_stack.rax as usize)
     }
 }

+ 84 - 23
src/kernel/task/thread.rs

@@ -1,12 +1,13 @@
 use core::{
-    cell::RefCell,
+    arch::naked_asm,
+    cell::{RefCell, UnsafeCell},
     cmp,
     sync::atomic::{self, AtomicBool, AtomicU32},
 };
 
 use crate::{
     kernel::{
-        arch::user::TLS, mem::MMList, terminal::Terminal, user::dataflow::CheckedUserPointer,
+        cpu::current_cpu, mem::MMList, terminal::Terminal, user::dataflow::CheckedUserPointer,
         vfs::FsContext,
     },
     prelude::*,
@@ -27,6 +28,8 @@ use super::{
     KernelStack, Scheduler,
 };
 
+use arch::{InterruptContext, TaskContext, UserTLS};
+
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum ThreadState {
     Preparing,
@@ -85,6 +88,7 @@ struct SessionInner {
     groups: BTreeMap<u32, Weak<ProcessGroup>>,
 }
 
+#[allow(dead_code)]
 #[derive(Debug)]
 pub struct Session {
     sid: u32,
@@ -93,6 +97,7 @@ pub struct Session {
     inner: Spin<SessionInner>,
 }
 
+#[allow(dead_code)]
 #[derive(Debug)]
 pub struct ProcessGroup {
     pgid: u32,
@@ -187,7 +192,7 @@ struct ThreadInner {
     name: Arc<[u8]>,
 
     /// Thread TLS
-    tls: Option<TLS>,
+    tls: Option<UserTLS>,
 
     /// User pointer
     /// Store child thread's tid when child thread returns to user space.
@@ -208,6 +213,9 @@ pub struct Thread {
 
     pub oncpu: AtomicBool,
 
+    /// Thread context
+    pub context: UnsafeCell<TaskContext>,
+
     /// Kernel stack
     /// Never access this directly.
     ///
@@ -770,6 +778,7 @@ impl Process {
     }
 }
 
+#[allow(dead_code)]
 impl UserDescriptorFlags {
     fn is_32bit_segment(&self) -> bool {
         self.0 & 0b1 != 0
@@ -805,6 +814,7 @@ impl Thread {
             fs_context: FsContext::new_for_init(),
             signal_list: SignalList::new(),
             kstack: RefCell::new(KernelStack::new()),
+            context: UnsafeCell::new(TaskContext::new()),
             state: Spin::new(ThreadState::Preparing),
             oncpu: AtomicBool::new(false),
             inner: Spin::new(ThreadInner {
@@ -835,6 +845,7 @@ impl Thread {
             fs_context: FsContext::new_cloned(&other.fs_context),
             signal_list,
             kstack: RefCell::new(KernelStack::new()),
+            context: UnsafeCell::new(TaskContext::new()),
             state: Spin::new(ThreadState::Preparing),
             oncpu: AtomicBool::new(false),
             inner: Spin::new(ThreadInner {
@@ -893,9 +904,13 @@ impl Thread {
         }
     }
 
-    pub fn load_thread_area32(&self) {
+    /// # Safety
+    /// This function is unsafe because it accesses the `current_cpu()`, which needs
+    /// to be called in a preemption disabled context.
+    pub unsafe fn load_thread_area32(&self) {
         if let Some(tls) = self.inner.lock().tls.as_ref() {
-            tls.load();
+            // SAFETY: Preemption is disabled.
+            tls.load(current_cpu());
         }
     }
 
@@ -918,37 +933,49 @@ impl Thread {
             return Ok(());
         }
 
-        let (tls, entry) = TLS::new32(desc.base, desc.limit, desc.flags.is_limit_in_pages());
+        let (tls, entry) = UserTLS::new32(desc.base, desc.limit, desc.flags.is_limit_in_pages());
         desc.entry = entry;
         inner.tls = Some(tls);
         Ok(())
     }
 
-    /// This function is used to prepare the kernel stack for the thread in `Preparing` state.
-    ///
-    /// # Safety
-    /// Calling this function on a thread that is not in `Preparing` state will panic.
-    pub fn prepare_kernel_stack<F: FnOnce(&mut KernelStack)>(&self, func: F) {
+    pub fn fork_init(&self, interrupt_context: InterruptContext) {
         let mut state = self.state.lock();
-        assert!(matches!(*state, ThreadState::Preparing));
+        *state = ThreadState::USleep;
 
-        // SAFETY: We are in the preparing state with `state` locked.
-        func(&mut self.kstack.borrow_mut());
+        let sp = self.kstack.borrow().init(interrupt_context);
+        unsafe {
+            self.get_context_mut_ptr()
+                .as_mut()
+                .unwrap()
+                .init(fork_return as usize, sp);
+        }
+    }
 
-        // Enter USleep state. Await for the thread to be scheduled manually.
+    pub fn init(&self, entry: usize) {
+        let mut state = self.state.lock();
         *state = ThreadState::USleep;
+        unsafe {
+            self.get_context_mut_ptr()
+                .as_mut()
+                .unwrap()
+                .init(entry, self.get_kstack_bottom());
+        }
     }
 
-    pub fn load_interrupt_stack(&self) {
+    /// # Safety
+    /// This function is unsafe because it accesses the `current_cpu()`, which needs
+    /// to be called in a preemption disabled context.
+    pub unsafe fn load_interrupt_stack(&self) {
         self.kstack.borrow().load_interrupt_stack();
     }
 
-    /// Get a pointer to `self.sp` so we can use it in `context_switch()`.
-    ///
-    /// # Safety
-    /// Save the pointer somewhere or pass it to a function that will use it is UB.
-    pub unsafe fn get_sp_ptr(&self) -> *mut usize {
-        self.kstack.borrow().get_sp_ptr()
+    pub fn get_kstack_bottom(&self) -> usize {
+        self.kstack.borrow().get_stack_bottom()
+    }
+
+    pub unsafe fn get_context_mut_ptr(&self) -> *mut TaskContext {
+        self.context.get()
     }
 
     pub fn set_name(&self, name: Arc<[u8]>) {
@@ -960,6 +987,37 @@ impl Thread {
     }
 }
 
+#[naked]
+unsafe extern "C" fn fork_return() {
+    // We don't land on the typical `Scheduler::schedule()` function, so we need to
+    // manually enable preemption.
+    naked_asm! {
+        "
+        call {preempt_enable}
+        swapgs
+        pop %rax
+        pop %rbx
+        pop %rcx
+        pop %rdx
+        pop %rdi
+        pop %rsi
+        pop %r8
+        pop %r9
+        pop %r10
+        pop %r11
+        pop %r12
+        pop %r13
+        pop %r14
+        pop %r15
+        pop %rbp
+        add $16, %rsp
+        iretq
+        ",
+        preempt_enable = sym preempt::enable,
+        options(att_syntax),
+    }
+}
+
 // TODO: Maybe we can find a better way instead of using `RefCell` for `KernelStack`?
 unsafe impl Sync for Thread {}
 
@@ -1088,6 +1146,9 @@ pub fn init_multitasking() {
     // Lazy init
     assert!(ProcessList::get().try_find_thread(1).is_some());
 
-    Thread::current().load_interrupt_stack();
+    unsafe {
+        // SAFETY: Preemption is disabled outside this function.
+        Thread::current().load_interrupt_stack();
+    }
     Thread::current().process.mm_list.switch_page_table();
 }

+ 2 - 2
src/kernel/timer.rs

@@ -1,8 +1,8 @@
 use core::sync::atomic::{AtomicUsize, Ordering};
 
-use crate::{println_debug, sync::preempt};
+use crate::sync::preempt;
 
-use super::{arch::interrupt::end_of_interrupt, task::Scheduler};
+use super::{interrupt::end_of_interrupt, task::Scheduler};
 
 static TICKS: AtomicUsize = AtomicUsize::new(0);
 

+ 25 - 22
src/lib.rs

@@ -5,6 +5,8 @@
 #![feature(arbitrary_self_types)]
 #![feature(get_mut_unchecked)]
 #![feature(macro_metavar_expr)]
+#![feature(naked_functions)]
+
 extern crate alloc;
 
 #[allow(warnings)]
@@ -29,10 +31,7 @@ use core::{
 };
 use elf::ParsedElf32;
 use kernel::{
-    mem::{
-        paging::Page,
-        phys::{CachedPP, PhysPtr as _},
-    },
+    cpu::init_thiscpu,
     task::{init_multitasking, Scheduler, Thread},
     vfs::{
         dentry::Dentry,
@@ -60,7 +59,7 @@ fn panic(info: &core::panic::PanicInfo) -> ! {
     println_fatal!();
     println_fatal!("{}", info.message());
 
-    arch::task::freeze()
+    arch::freeze()
 }
 
 extern "C" {
@@ -112,7 +111,7 @@ extern "C" {
 pub extern "C" fn rust_kinit(early_kstack_pfn: usize) -> ! {
     // We don't call global constructors.
     // Rust doesn't need that, and we're not going to use global variables in C++.
-    unsafe { kernel::arch::init::init_cpu() };
+    unsafe { init_thiscpu() };
 
     kernel::interrupt::init().unwrap();
 
@@ -121,33 +120,37 @@ pub extern "C" fn rust_kinit(early_kstack_pfn: usize) -> ! {
 
     kernel::vfs::mount::init_vfs().unwrap();
 
+    // To satisfy the `Scheduler` "preempt count == 0" assertion.
+    preempt::disable();
+
     // We need root dentry to be present in constructor of `FsContext`.
     // So call `init_vfs` first, then `init_multitasking`.
     init_multitasking();
-    Thread::current().prepare_kernel_stack(|kstack| {
-        let mut writer = kstack.get_writer();
-        writer.entry = to_init_process;
-        writer.flags = 0x200;
-        writer.rbp = 0;
-        writer.rbx = early_kstack_pfn; // `to_init_process` arg
-        writer.finish();
-    });
 
-    // To satisfy the `Scheduler` "preempt count == 0" assertion.
-    preempt::disable();
+    Thread::current().init(init_process as usize);
 
     Scheduler::get().lock().uwake(&Thread::current());
 
-    arch::task::context_switch_light(
-        CachedPP::new(early_kstack_pfn).as_ptr(), // We will never come back
-        unsafe { Scheduler::idle_task().get_sp_ptr() },
+    let mut unuse_ctx = arch::TaskContext::new();
+    // TODO: Temporary solution: we will never access this later on.
+    unuse_ctx.init(
+        to_init_process as usize,
+        early_kstack_pfn + 0x1000 + 0xffffff0000000000,
     );
-    arch::task::freeze()
+    unsafe {
+        arch::TaskContext::switch_to(
+            &mut unuse_ctx, // We will never come back
+            &mut *Scheduler::idle_task().get_context_mut_ptr(),
+        );
+    }
+
+    arch::freeze()
 }
 
 /// We enter this function with `preempt count == 0`
-extern "C" fn init_process(early_kstack_pfn: usize) {
-    unsafe { Page::take_pfn(early_kstack_pfn, 9) };
+extern "C" fn init_process(/* early_kstack_pfn: usize */) {
+    // TODO!!! Should free pass eraly_kstack_pfn and free !!!
+    // unsafe { Page::take_pfn(early_kstack_pfn, 9) };
     preempt::enable();
 
     kernel::syscall::register_syscalls();

+ 2 - 0
src/sync/semaphore.rs

@@ -2,6 +2,7 @@ use super::{strategy::LockStrategy, Spin, UCondVar};
 
 pub struct SemaphoreStrategy<const MAX: usize = { core::usize::MAX }>;
 
+#[allow(dead_code)]
 impl<const MAX: usize> SemaphoreStrategy<MAX> {
     #[inline(always)]
     fn is_locked(data: &<Self as LockStrategy>::StrategyData) -> bool {
@@ -62,6 +63,7 @@ unsafe impl<const MAX: usize> LockStrategy for SemaphoreStrategy<MAX> {
 
 pub struct RwSemaphoreStrategy<const READ_MAX: isize = { core::isize::MAX }>;
 
+#[allow(dead_code)]
 impl<const READ_MAX: isize> RwSemaphoreStrategy<READ_MAX> {
     #[inline(always)]
     fn is_read_locked(data: &<Self as LockStrategy>::StrategyData) -> bool {