Selaa lähdekoodia

feat(hal): smp initialization

greatbridf 8 kuukautta sitten
vanhempi
commit
dd32b93963
78 muutettua tiedostoa jossa 1636 lisäystä ja 988 poistoa
  1. 0 1
      .vscode/settings.json
  2. 52 25
      Cargo.lock
  3. 1 1
      Cargo.toml
  4. 0 17
      arch/src/x86_64/io.rs
  5. 0 18
      arch/src/x86_64/mod.rs
  6. 0 89
      arch/src/x86_64/percpu.rs
  7. 0 54
      arch/src/x86_64/user.rs
  8. 2 0
      crates/eonix_hal/Cargo.toml
  9. 130 5
      crates/eonix_hal/eonix_hal_macros/src/lib.rs
  10. 1 0
      crates/eonix_hal/eonix_hal_traits/Cargo.toml
  11. 2 0
      crates/eonix_hal/eonix_hal_traits/src/lib.rs
  12. 6 0
      crates/eonix_hal/eonix_hal_traits/src/mm.rs
  13. 7 0
      crates/eonix_hal/eonix_hal_traits/src/processor.rs
  14. 5 0
      crates/eonix_hal/eonix_hal_traits/src/trap.rs
  15. 12 22
      crates/eonix_hal/src/arch/x86_64/bootstrap.rs
  16. 374 0
      crates/eonix_hal/src/arch/x86_64/bootstrap/init.rs
  17. 115 37
      crates/eonix_hal/src/arch/x86_64/cpu.rs
  18. 28 23
      crates/eonix_hal/src/arch/x86_64/gdt.rs
  19. 18 54
      crates/eonix_hal/src/arch/x86_64/interrupt.rs
  20. 5 3
      crates/eonix_hal/src/arch/x86_64/link.x
  21. 3 4
      crates/eonix_hal/src/arch/x86_64/memory.x
  22. 174 10
      crates/eonix_hal/src/arch/x86_64/mm.rs
  23. 3 0
      crates/eonix_hal/src/arch/x86_64/mod.rs
  24. 56 10
      crates/eonix_hal/src/arch/x86_64/trap.rs
  25. 22 0
      crates/eonix_hal/src/bootstrap.rs
  26. 3 1
      crates/eonix_hal/src/lib.rs
  27. 198 1
      crates/eonix_hal/src/mm.rs
  28. 1 1
      crates/eonix_hal/src/processor.rs
  29. 1 4
      crates/eonix_hal/src/trap.rs
  30. 1 1
      crates/eonix_mm/src/address.rs
  31. 24 0
      crates/eonix_mm/src/address/paddr.rs
  32. 46 18
      crates/eonix_mm/src/page_table/page_table.rs
  33. 1 5
      crates/eonix_mm/src/page_table/paging_mode.rs
  34. 1 1
      crates/eonix_mm/src/paging.rs
  35. 14 1
      crates/eonix_mm/src/paging/page.rs
  36. 6 6
      crates/eonix_mm/src/paging/raw_page.rs
  37. 1 1
      crates/eonix_percpu/eonix_percpu_macros/src/lib.rs
  38. 72 0
      crates/eonix_percpu/src/lib.rs
  39. 41 10
      crates/eonix_preempt/src/lib.rs
  40. 1 0
      crates/eonix_runtime/src/scheduler.rs
  41. 6 7
      crates/eonix_sync/Cargo.toml
  42. 8 0
      crates/eonix_sync/eonix_spin/Cargo.toml
  43. 1 1
      crates/eonix_sync/eonix_spin/src/guard.rs
  44. 11 4
      crates/eonix_sync/eonix_spin/src/lib.rs
  45. 10 0
      crates/eonix_sync/eonix_sync_base/Cargo.toml
  46. 0 0
      crates/eonix_sync/eonix_sync_base/src/guard.rs
  47. 0 0
      crates/eonix_sync/eonix_sync_base/src/lazy_lock.rs
  48. 13 0
      crates/eonix_sync/eonix_sync_base/src/lib.rs
  49. 0 0
      crates/eonix_sync/eonix_sync_base/src/locked.rs
  50. 0 0
      crates/eonix_sync/eonix_sync_base/src/locked/proof.rs
  51. 0 0
      crates/eonix_sync/eonix_sync_base/src/marker.rs
  52. 0 0
      crates/eonix_sync/eonix_sync_base/src/relax.rs
  53. 13 0
      crates/eonix_sync/eonix_sync_rt/Cargo.toml
  54. 11 0
      crates/eonix_sync/eonix_sync_rt/src/lib.rs
  55. 0 0
      crates/eonix_sync/eonix_sync_rt/src/mutex.rs
  56. 1 1
      crates/eonix_sync/eonix_sync_rt/src/mutex/guard.rs
  57. 0 0
      crates/eonix_sync/eonix_sync_rt/src/rwlock.rs
  58. 1 1
      crates/eonix_sync/eonix_sync_rt/src/rwlock/guard.rs
  59. 7 4
      crates/eonix_sync/eonix_sync_rt/src/spin_irq.rs
  60. 3 1
      crates/eonix_sync/eonix_sync_rt/src/wait_list.rs
  61. 0 0
      crates/eonix_sync/eonix_sync_rt/src/wait_list/wait_handle.rs
  62. 2 1
      crates/eonix_sync/eonix_sync_rt/src/wait_list/wait_object.rs
  63. 3 21
      crates/eonix_sync/src/lib.rs
  64. 10 12
      doc/mem_layout.txt
  65. 0 4
      src/kernel.rs
  66. 0 104
      src/kernel/async/lock.cc
  67. 0 46
      src/kernel/cpu.rs
  68. 5 27
      src/kernel/interrupt.rs
  69. 1 1
      src/kernel/mem.rs
  70. 4 21
      src/kernel/mem/access.rs
  71. 13 16
      src/kernel/mem/mm_list.rs
  72. 0 25
      src/kernel/mem/page_alloc.rs
  73. 1 2
      src/kernel/mem/paging.rs
  74. 0 43
      src/kernel/smp.rs
  75. 9 7
      src/kernel/task/thread.rs
  76. 29 185
      src/kernel_init.rs
  77. 47 10
      src/lib.rs
  78. 0 21
      src/types/libstdcpp.cpp

+ 0 - 1
.vscode/settings.json

@@ -1,4 +1,3 @@
 {
     "makefile.configureOnOpen": false,
-    "rust-analyzer.check.allTargets": false,
 }

+ 52 - 25
Cargo.lock

@@ -73,35 +73,11 @@ version = "1.13.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
 
-[[package]]
-name = "eonix-kernel"
-version = "0.1.0"
-dependencies = [
- "acpi",
- "arch",
- "atomic_unique_refcell",
- "bitflags",
- "buddy_allocator",
- "eonix_hal",
- "eonix_log",
- "eonix_macros",
- "eonix_mm",
- "eonix_percpu",
- "eonix_preempt",
- "eonix_runtime",
- "eonix_sync",
- "intrusive-collections",
- "intrusive_list",
- "itertools",
- "pointers",
- "posix_types",
- "slab_allocator",
-]
-
 [[package]]
 name = "eonix_hal"
 version = "0.1.0"
 dependencies = [
+ "acpi",
  "arch",
  "cfg-if",
  "eonix_hal_macros",
@@ -109,6 +85,7 @@ dependencies = [
  "eonix_mm",
  "eonix_percpu",
  "eonix_preempt",
+ "eonix_sync_base",
 ]
 
 [[package]]
@@ -125,6 +102,32 @@ name = "eonix_hal_traits"
 version = "0.1.0"
 dependencies = [
  "bitflags",
+ "eonix_mm",
+]
+
+[[package]]
+name = "eonix_kernel"
+version = "0.1.0"
+dependencies = [
+ "acpi",
+ "arch",
+ "atomic_unique_refcell",
+ "bitflags",
+ "buddy_allocator",
+ "eonix_hal",
+ "eonix_log",
+ "eonix_macros",
+ "eonix_mm",
+ "eonix_percpu",
+ "eonix_preempt",
+ "eonix_runtime",
+ "eonix_sync",
+ "intrusive-collections",
+ "intrusive_list",
+ "itertools",
+ "pointers",
+ "posix_types",
+ "slab_allocator",
 ]
 
 [[package]]
@@ -189,12 +192,36 @@ dependencies = [
  "pointers",
 ]
 
+[[package]]
+name = "eonix_spin"
+version = "0.1.0"
+dependencies = [
+ "eonix_preempt",
+ "eonix_sync_base",
+]
+
 [[package]]
 name = "eonix_sync"
 version = "0.1.0"
+dependencies = [
+ "eonix_spin",
+ "eonix_sync_base",
+ "eonix_sync_rt",
+]
+
+[[package]]
+name = "eonix_sync_base"
+version = "0.1.0"
+
+[[package]]
+name = "eonix_sync_rt"
+version = "0.1.0"
 dependencies = [
  "arch",
+ "eonix_hal",
  "eonix_preempt",
+ "eonix_spin",
+ "eonix_sync_base",
  "intrusive-collections",
 ]
 

+ 1 - 1
Cargo.toml

@@ -1,5 +1,5 @@
 [package]
-name = "eonix-kernel"
+name = "eonix_kernel"
 version = "0.1.0"
 edition = "2021"
 

+ 0 - 17
arch/src/x86_64/io.rs

@@ -1,22 +1,5 @@
 use core::arch::asm;
 
-pub fn enable_sse() {
-    unsafe {
-        asm!(
-            "mov %cr0, %rax",
-            "and $(~0xc), %rax",
-            "or $0x22, %rax",
-            "mov %rax, %cr0",
-            "mov %cr4, %rax",
-            "or $0x600, %rax",
-            "mov %rax, %cr4",
-            "fninit",
-            out("rax") _,
-            options(att_syntax, nomem, nostack)
-        )
-    }
-}
-
 pub fn inb(no: u16) -> u8 {
     let data;
     unsafe {

+ 0 - 18
arch/src/x86_64/mod.rs

@@ -1,24 +1,14 @@
 mod fence;
 mod fpu;
-mod gdt;
-mod init;
-mod interrupt;
 mod io;
-mod percpu;
-mod user;
 
 use core::arch::asm;
 use eonix_mm::address::{Addr as _, PAddr, VAddr};
 use eonix_mm::paging::PFN;
 
-pub use self::gdt::*;
-pub use self::init::*;
-pub use self::interrupt::*;
 pub use self::io::*;
-pub use self::user::*;
 pub use fence::*;
 pub use fpu::*;
-pub use percpu::*;
 
 #[inline(always)]
 pub fn flush_tlb(vaddr: usize) {
@@ -94,14 +84,6 @@ pub fn pause() {
     }
 }
 
-#[inline(always)]
-pub fn freeze() -> ! {
-    loop {
-        interrupt::disable_irqs();
-        halt();
-    }
-}
-
 #[inline(always)]
 pub fn rdmsr(msr: u32) -> u64 {
     let edx: u32;

+ 0 - 89
arch/src/x86_64/percpu.rs

@@ -1,89 +0,0 @@
-use super::wrmsr;
-use core::{
-    alloc::Layout,
-    arch::asm,
-    cell::UnsafeCell,
-    ptr::{null_mut, NonNull},
-    sync::atomic::{AtomicPtr, Ordering},
-};
-use eonix_mm::paging::PAGE_SIZE;
-
-pub const MAX_CPUS: usize = 256;
-
-#[repr(align(4096))]
-struct PercpuData(UnsafeCell<()>); // Not `Sync`.
-
-pub struct PercpuArea {
-    data: NonNull<PercpuData>,
-}
-
-static PERCPU_POINTERS: [AtomicPtr<PercpuData>; MAX_CPUS] =
-    [const { AtomicPtr::new(null_mut()) }; MAX_CPUS];
-
-impl PercpuArea {
-    fn len() -> usize {
-        extern "C" {
-            fn PERCPU_LENGTH();
-        }
-        let len = PERCPU_LENGTH as usize;
-
-        assert_ne!(len, 0, "Percpu length should not be zero.");
-        len
-    }
-
-    fn page_count() -> usize {
-        Self::len().div_ceil(PAGE_SIZE)
-    }
-
-    fn data_start() -> NonNull<u8> {
-        extern "C" {
-            fn PERCPU_DATA_START();
-        }
-
-        let addr = PERCPU_DATA_START as usize;
-        NonNull::new(addr as *mut _).expect("Percpu data should not be null.")
-    }
-
-    fn layout() -> Layout {
-        Layout::from_size_align(Self::page_count() * PAGE_SIZE, PAGE_SIZE).expect("Invalid layout.")
-    }
-
-    pub fn new<F>(allocate: F) -> Self
-    where
-        F: FnOnce(Layout) -> NonNull<u8>,
-    {
-        let data_pointer = allocate(Self::layout());
-
-        unsafe {
-            // SAFETY: The `data_pointer` is of valid length and properly aligned.
-            data_pointer.copy_from_nonoverlapping(Self::data_start(), Self::len());
-        }
-
-        Self {
-            data: data_pointer.cast(),
-        }
-    }
-
-    /// Set up the percpu area for the current CPU.
-    pub fn setup(&self) {
-        wrmsr(0xC0000101, self.data.as_ptr() as u64);
-
-        unsafe {
-            // SAFETY: %gs:0 points to the start of the percpu area.
-            asm!(
-                "movq {}, %gs:0",
-                in(reg) self.data.as_ptr(),
-                options(nostack, preserves_flags, att_syntax)
-            );
-        }
-    }
-
-    pub fn register(self: Self, cpuid: usize) {
-        PERCPU_POINTERS[cpuid].store(self.data.as_ptr(), Ordering::Release);
-    }
-
-    pub fn get_for(cpuid: usize) -> Option<NonNull<()>> {
-        let pointer = PERCPU_POINTERS[cpuid].load(Ordering::Acquire);
-        NonNull::new(pointer.cast())
-    }
-}

+ 0 - 54
arch/src/x86_64/user.rs

@@ -1,54 +0,0 @@
-use core::pin::Pin;
-
-use super::{CPU, GDTEntry};
-
-#[derive(Debug, Clone)]
-pub enum UserTLS {
-    /// TODO: This is not used yet.
-    #[allow(dead_code)]
-    TLS64(u64),
-    TLS32 {
-        base: u64,
-        desc: GDTEntry,
-    },
-}
-
-impl UserTLS {
-    /// # Return
-    /// Returns the TLS descriptor and the index of the TLS segment.
-    pub fn new32(base: u32, limit: u32, is_limit_in_pages: bool) -> (Self, u32) {
-        let flags = if is_limit_in_pages { 0xc } else { 0x4 };
-
-        (
-            Self::TLS32 {
-                base: base as u64,
-                desc: GDTEntry::new(base, limit, 0xf2, flags),
-            },
-            7,
-        )
-    }
-
-    pub fn load(&self, cpu_status: Pin<&mut CPU>) {
-        match self {
-            Self::TLS64(base) => {
-                const IA32_KERNEL_GS_BASE: u32 = 0xc0000102;
-                super::wrmsr(IA32_KERNEL_GS_BASE, *base);
-            }
-            Self::TLS32 { base, desc } => {
-                unsafe {
-                    // SAFETY: We don't move the CPUStatus object.
-                    let cpu_mut = cpu_status.get_unchecked_mut();
-                    cpu_mut.set_tls32(*desc);
-                }
-
-                const IA32_KERNEL_GS_BASE: u32 = 0xc0000102;
-                super::wrmsr(IA32_KERNEL_GS_BASE, *base);
-            }
-        }
-    }
-}
-
-pub unsafe fn load_interrupt_stack(cpu_status: Pin<&mut CPU>, stack: u64) {
-    // SAFETY: We don't move the CPUStatus object.
-    cpu_status.get_unchecked_mut().set_rsp0(stack);
-}

+ 2 - 0
crates/eonix_hal/Cargo.toml

@@ -10,7 +10,9 @@ eonix_hal_macros = { path = "./eonix_hal_macros" }
 
 arch = { path = "../../arch" }
 eonix_mm = { path = "../eonix_mm" }
+eonix_sync_base = { path = "../eonix_sync/eonix_sync_base" }
 eonix_percpu = { path = "../eonix_percpu" }
 eonix_preempt = { path = "../eonix_preempt" }
 
+acpi = "5.2.0"
 cfg-if = "1.0"

+ 130 - 5
crates/eonix_hal/eonix_hal_macros/src/lib.rs

@@ -2,7 +2,7 @@ extern crate proc_macro;
 
 use proc_macro::TokenStream;
 use quote::quote;
-use syn::{parse_macro_input, ItemFn};
+use syn::{parse, parse_macro_input, spanned::Spanned as _, FnArg, ItemFn};
 
 /// Define the default trap handler. The function should take exactly one argument
 /// of type `&mut TrapContext`.
@@ -17,14 +17,24 @@ use syn::{parse_macro_input, ItemFn};
 /// ```
 #[proc_macro_attribute]
 pub fn default_trap_handler(attrs: TokenStream, item: TokenStream) -> TokenStream {
+    let item = parse_macro_input!(item as ItemFn);
+
     if !attrs.is_empty() {
-        panic!("`default_trap_handler` attribute does not take any arguments");
+        return parse::Error::new(
+            item.span(),
+            "`default_trap_handler` attribute does not take any arguments",
+        )
+        .into_compile_error()
+        .into();
     }
 
-    let item = parse_macro_input!(item as ItemFn);
-
     if item.sig.inputs.len() > 1 {
-        panic!("`default_trap_handler` only takes one argument");
+        return parse::Error::new(
+            item.span(),
+            "`default_trap_handler` only takes one argument",
+        )
+        .into_compile_error()
+        .into();
     }
 
     let attrs = &item.attrs;
@@ -38,3 +48,118 @@ pub fn default_trap_handler(attrs: TokenStream, item: TokenStream) -> TokenStrea
     }
     .into()
 }
+
+/// Define the entry point. The function should have signature like
+///
+/// ```ignore
+/// [unsafe] fn ident(ident: eonix_hal::bootstrap::BootStrapData) -> !
+/// ```
+///
+/// # Usage
+/// ```no_run
+/// #[eonix_hal::main]
+/// fn kernel_main(data: eonix_hal::bootstrap::BootStrapData) -> ! {
+///     // ...
+/// }
+/// ```
+#[proc_macro_attribute]
+pub fn main(attrs: TokenStream, item: TokenStream) -> TokenStream {
+    let item = parse_macro_input!(item as ItemFn);
+
+    if !attrs.is_empty() {
+        return parse::Error::new(item.span(), "`main` attribute does not take any arguments")
+            .into_compile_error()
+            .into();
+    }
+
+    if item.sig.inputs.len() != 1 {
+        return parse::Error::new(item.span(), "`main` should have exactly one argument.")
+            .into_compile_error()
+            .into();
+    }
+
+    let arg_ident = match item.sig.inputs.first().unwrap() {
+        FnArg::Receiver(_) => {
+            return parse::Error::new(
+                item.span(),
+                "`main` function cannot take `self` as an argument",
+            )
+            .into_compile_error()
+            .into();
+        }
+        FnArg::Typed(ty) => &ty.pat,
+    };
+
+    let ident = &item.sig.ident;
+    let attrs = item.attrs;
+    let unsafety = item.sig.unsafety;
+    let block = &item.block;
+
+    quote! {
+        #(#attrs)*
+        #[export_name = "_eonix_hal_main"]
+        pub #unsafety fn #ident(
+            #arg_ident: eonix_hal::bootstrap::BootStrapData,
+        ) -> ! #block
+    }
+    .into()
+}
+
+/// Define the AP entry point. The function should have signature like
+///
+/// ```ignore
+/// [unsafe] fn ident(ident: eonix_mm::address::PRange) -> !
+/// ```
+///
+/// # Usage
+/// ```no_run
+/// #[eonix_hal::main]
+/// fn ap_main(stack_range: eonix_mm::address::PRange) -> ! {
+///     // ...
+/// }
+/// ```
+#[proc_macro_attribute]
+pub fn ap_main(attrs: TokenStream, item: TokenStream) -> TokenStream {
+    let item = parse_macro_input!(item as ItemFn);
+
+    if !attrs.is_empty() {
+        return parse::Error::new(
+            item.span(),
+            "`ap_main` attribute does not take any arguments",
+        )
+        .into_compile_error()
+        .into();
+    }
+
+    if item.sig.inputs.len() != 1 {
+        return parse::Error::new(item.span(), "`ap_main` should have exactly one argument.")
+            .into_compile_error()
+            .into();
+    }
+
+    let arg_ident = match item.sig.inputs.first().unwrap() {
+        FnArg::Receiver(_) => {
+            return parse::Error::new(
+                item.span(),
+                "`ap_main` function cannot take `self` as an argument",
+            )
+            .into_compile_error()
+            .into();
+        }
+        FnArg::Typed(ty) => &ty.pat,
+    };
+
+    let ident = &item.sig.ident;
+    let attrs = item.attrs;
+    let unsafety = item.sig.unsafety;
+    let block = &item.block;
+
+    quote! {
+        #(#attrs)*
+        #[export_name = "_eonix_hal_ap_main"]
+        pub #unsafety fn #ident(
+            #arg_ident: eonix_mm::address::PRange,
+        ) -> ! #block
+    }
+    .into()
+}

+ 1 - 0
crates/eonix_hal/eonix_hal_traits/Cargo.toml

@@ -4,4 +4,5 @@ version = "0.1.0"
 edition = "2024"
 
 [dependencies]
+eonix_mm = { path = "../../eonix_mm" }
 bitflags = "2.6.0"

+ 2 - 0
crates/eonix_hal/eonix_hal_traits/src/lib.rs

@@ -4,4 +4,6 @@
 pub mod context;
 pub mod fault;
 pub mod fpu;
+pub mod mm;
+pub mod processor;
 pub mod trap;

+ 6 - 0
crates/eonix_hal/eonix_hal_traits/src/mm.rs

@@ -0,0 +1,6 @@
+use eonix_mm::address::PRange;
+
+pub trait Memory {
+    fn present_ram() -> impl Iterator<Item = PRange>;
+    fn free_ram() -> impl Iterator<Item = PRange>;
+}

+ 7 - 0
crates/eonix_hal/eonix_hal_traits/src/processor.rs

@@ -0,0 +1,7 @@
+use core::{ops::Deref, pin::Pin};
+
+pub trait Processor {
+    fn local() -> impl Deref<Target = Pin<&'static mut Self>>
+    where
+        Self: 'static;
+}

+ 5 - 0
crates/eonix_hal/eonix_hal_traits/src/trap.rs

@@ -38,6 +38,11 @@ pub trait TrapReturn {
     unsafe fn trap_return(&mut self);
 }
 
+pub trait IrqState {
+    /// Restore the IRQ state.
+    fn restore(self);
+}
+
 /// The reason that caused the trap.
 pub enum TrapType {
     Syscall { no: usize, args: [usize; 6] },

+ 12 - 22
crates/eonix_hal/src/arch/x86_64/bootstrap.rs

@@ -1,30 +1,26 @@
-use super::mm::{PA_G, PA_NXE, PA_P, PA_PS, PA_RW};
+pub(crate) mod init;
+
+use super::mm::{E820_MEM_MAP_DATA, PA_G, PA_NXE, PA_P, PA_PS, PA_RW};
 use core::arch::{global_asm, naked_asm};
 
-const KERNEL_IMAGE_PADDR: usize = 0x400000;
+const KERNEL_IMAGE_PADDR: usize = 0x200000;
 const KERNEL_PML4: usize = 0x1000;
 const KERNEL_PDPT_PHYS_MAPPING: usize = 0x2000;
 const KERNEL_PDPT_KERNEL_SPACE: usize = 0x3000;
 const KERNEL_PD_KIMAGE: usize = 0x4000;
 const KERNEL_PT_KIMAGE: usize = 0x5000;
-const KERNEL_PD_STRUCT_PAGE_ARR: usize = 0x6000;
 
 #[unsafe(link_section = ".low")]
-static EARLY_GDT: [u64; 7] = [0; 7];
+static mut EARLY_GDT: [u64; 7] = [0; 7];
 
 #[unsafe(no_mangle)]
 #[unsafe(link_section = ".low")]
-static EARLY_GDT_DESCRIPTOR: (u16, u32) = (0, 0);
-
-#[unsafe(link_section = ".low")]
-static BIOS_IDT_DESCRIPTOR: (u16, u32) = (0, 0);
+static mut EARLY_GDT_DESCRIPTOR: (u16, u32) = (0, 0);
 
 #[unsafe(link_section = ".low")]
-static E820_MEM_MAP_DATA: [u64; 128] = [0; 128];
+static mut BIOS_IDT_DESCRIPTOR: (u16, u32) = (0, 0);
 
 unsafe extern "C" {
-    fn _kernel_init() -> !;
-
     fn KIMAGE_32K_COUNT();
     fn KIMAGE_PAGES();
 
@@ -158,7 +154,7 @@ global_asm!(
     .align 16
     .Lread_data_packet:
         .long  0x00070010 # .stage1 takes up 3.5K, or 7 sectors
-        .long  0x00007000 # read to 0000:7000
+        .long  0x00006000 # read to 0000:6000
         .8byte 1          # read from LBA 1
     .popsection
     "#,
@@ -360,7 +356,7 @@ global_asm!(
 
         # clear paging structures
         mov $0x1000, %edi
-        mov $0x6000, %ecx
+        mov $0x5000, %ecx
         shr $2, %ecx # %ecx /= 4
         rep stosl
 
@@ -406,14 +402,9 @@ global_asm!(
         and $(~{PA_PS}), %ebx
         call fill_pxe
 
-        # PDPTE 0x008
-        mov ${KERNEL_PDPT_KERNEL_SPACE}, %edi
-        lea 0x8(%edi), %edi
-        mov ${KERNEL_PD_STRUCT_PAGE_ARR}, %esi
-        call fill_pxe
-
         # PDPTE 0xff8
-        lea 0xff0(%edi), %edi
+        mov ${KERNEL_PDPT_KERNEL_SPACE}, %edi
+        lea 0xff8(%edi), %edi
         mov ${KERNEL_PD_KIMAGE}, %esi
         call fill_pxe
 
@@ -490,7 +481,6 @@ global_asm!(
     PA_NXE = const PA_NXE,
     KERNEL_PDPT_PHYS_MAPPING = const KERNEL_PDPT_PHYS_MAPPING,
     KERNEL_PDPT_KERNEL_SPACE = const KERNEL_PDPT_KERNEL_SPACE,
-    KERNEL_PD_STRUCT_PAGE_ARR = const KERNEL_PD_STRUCT_PAGE_ARR,
     KERNEL_PD_KIMAGE = const KERNEL_PD_KIMAGE,
     KERNEL_PT_KIMAGE = const KERNEL_PT_KIMAGE,
     start_64bit = sym start_64bit,
@@ -520,7 +510,7 @@ pub unsafe extern "C" fn start_64bit() {
         kernel_identical_base = const 0xffffff0000000000u64,
         stack_paddr = const 0x80000,
         e820_data_addr = sym E820_MEM_MAP_DATA,
-        kernel_init = sym _kernel_init,
+        kernel_init = sym init::kernel_init,
         options(att_syntax)
     )
 }

+ 374 - 0
crates/eonix_hal/src/arch/x86_64/bootstrap/init.rs

@@ -0,0 +1,374 @@
+use crate::{
+    arch::{
+        bootstrap::{EARLY_GDT_DESCRIPTOR, KERNEL_PML4},
+        cpu::CPU,
+        mm::{ArchPhysAccess, GLOBAL_PAGE_TABLE, V_KERNEL_BSS_START},
+    },
+    bootstrap::BootStrapData,
+    mm::{ArchMemory, ArchPagingMode, BasicPageAlloc, BasicPageAllocRef, ScopedAllocator},
+};
+use acpi::{platform::ProcessorState, AcpiHandler, AcpiTables, PhysicalMapping, PlatformInfo};
+use arch::wrmsr;
+use core::{
+    alloc::Allocator,
+    arch::{asm, global_asm},
+    cell::RefCell,
+    hint::spin_loop,
+    sync::atomic::{AtomicBool, AtomicPtr, AtomicUsize, Ordering},
+};
+use eonix_hal_traits::mm::Memory;
+use eonix_mm::{
+    address::{Addr as _, PAddr, PRange, PhysAccess, VRange},
+    page_table::{PageAttribute, PagingMode, PTE as _},
+    paging::{Page, PageAccess, PageAlloc, PAGE_SIZE},
+};
+use eonix_percpu::PercpuArea;
+
+static BSP_PAGE_ALLOC: AtomicPtr<RefCell<BasicPageAlloc>> = AtomicPtr::new(core::ptr::null_mut());
+
+static AP_COUNT: AtomicUsize = AtomicUsize::new(0);
+static AP_STACK: AtomicUsize = AtomicUsize::new(0);
+static AP_SEM: AtomicBool = AtomicBool::new(false);
+
+global_asm!(
+    r#"
+    .pushsection .stage1.smp, "ax", @progbits
+    .code16
+    ljmp $0x0, $2f
+
+    2:
+    lgdt {early_gdt_descriptor}
+    mov $0xc0000080, %ecx
+    rdmsr
+    or $0x901, %eax # set LME, NXE, SCE
+    wrmsr
+
+    mov %cr4, %eax
+    or $0xa0, %eax # set PAE, PGE
+    mov %eax, %cr4
+
+    mov ${kernel_pml4}, %eax
+    mov %eax, %cr3
+
+    mov %cr0, %eax
+    or $0x80010001, %eax # set PE, WP, PG
+    mov %eax, %cr0
+
+    ljmp $0x08, $2f
+
+    .code64
+    2:
+    mov $0x10, %ax
+    mov %ax, %ds
+    mov %ax, %es
+    mov %ax, %ss
+
+    xor %rax, %rax
+    inc %rax
+    mov ${ap_semaphore}, %rcx
+
+    2:
+    xchg %rax, (%rcx) # AcqRel
+    cmp $0, %rax
+    je 2f
+    pause
+    jmp 2b
+
+    2:
+    mov ${ap_stack}, %rcx
+
+    2:
+    mov (%rcx), %rsp # Acquire
+    cmp $0, %rsp
+    jne 2f
+    pause
+    jmp 2b
+
+    2:
+    xor %rbp, %rbp
+    mov %rbp, (%rcx) # Relaxed
+
+    mov ${ap_semaphore}, %rcx
+    xchg %rax, (%rcx) # Release
+
+    mov %rsp, %rdi
+    push %rbp # NULL return address
+    mov ${ap_entry}, %rax
+    jmp *%rax
+
+    .popsection
+    "#,
+    early_gdt_descriptor = sym EARLY_GDT_DESCRIPTOR,
+    kernel_pml4 = const KERNEL_PML4,
+    ap_semaphore = sym AP_SEM,
+    ap_stack = sym AP_STACK,
+    ap_entry = sym ap_entry,
+    options(att_syntax),
+);
+
+fn enable_sse() {
+    unsafe {
+        asm!(
+            "mov %cr0, %rax",
+            "and $(~0xc), %rax",
+            "or $0x22, %rax",
+            "mov %rax, %cr0",
+            "mov %cr4, %rax",
+            "or $0x600, %rax",
+            "mov %rax, %cr4",
+            "fninit",
+            out("rax") _,
+            options(att_syntax, nomem, nostack)
+        )
+    }
+}
+
+fn setup_cpu(alloc: impl PageAlloc) {
+    let mut percpu_area = PercpuArea::new(|layout| {
+        // TODO: Use page size defined in `arch`.
+        let page_count = layout.size().div_ceil(PAGE_SIZE);
+        let page = Page::alloc_at_least_in(page_count, alloc);
+
+        let ptr = ArchPhysAccess::get_ptr_for_page(&page).cast();
+        page.into_raw();
+
+        ptr
+    });
+
+    percpu_area.setup(|pointer| {
+        wrmsr(0xC0000101, pointer.addr().get() as u64);
+
+        unsafe {
+            // SAFETY: %gs:0 points to the start of the percpu area.
+            asm!(
+                "movq {}, %gs:0",
+                in(reg) pointer.addr().get(),
+                options(nostack, preserves_flags, att_syntax)
+            );
+        }
+    });
+
+    let mut cpu = CPU::local();
+    unsafe {
+        // SAFETY: Preemption is disabled and interrupt MUST be disabled since
+        //         we are doing this in the kernel initialization phase.
+        cpu.as_mut().init();
+    }
+
+    percpu_area.register(cpu.cpuid());
+}
+
+fn setup_pic() {
+    // TODO: Remove this when we have completely switched to APIC.
+    pub struct Port8 {
+        no: u16,
+    }
+
+    impl Port8 {
+        pub const fn new(no: u16) -> Self {
+            Self { no }
+        }
+
+        pub fn write(&self, data: u8) {
+            arch::outb(self.no, data)
+        }
+    }
+
+    const PIC1_COMMAND: Port8 = Port8::new(0x20);
+    const PIC1_DATA: Port8 = Port8::new(0x21);
+    const PIC2_COMMAND: Port8 = Port8::new(0xA0);
+    const PIC2_DATA: Port8 = Port8::new(0xA1);
+
+    // Initialize PIC
+    PIC1_COMMAND.write(0x11); // edge trigger mode
+    PIC1_DATA.write(0x20); // IRQ 0-7 offset
+    PIC1_DATA.write(0x04); // cascade with slave PIC
+    PIC1_DATA.write(0x01); // no buffer mode
+
+    PIC2_COMMAND.write(0x11); // edge trigger mode
+    PIC2_DATA.write(0x28); // IRQ 8-15 offset
+    PIC2_DATA.write(0x02); // cascade with master PIC
+    PIC2_DATA.write(0x01); // no buffer mode
+
+    // Allow all IRQs
+    PIC1_DATA.write(0x0);
+    PIC2_DATA.write(0x0);
+}
+
+fn bootstrap_smp(alloc: impl Allocator, page_alloc: &RefCell<BasicPageAlloc>) {
+    #[derive(Clone)]
+    struct Handler;
+
+    impl AcpiHandler for Handler {
+        unsafe fn map_physical_region<T>(
+            &self,
+            physical_address: usize,
+            size: usize,
+        ) -> PhysicalMapping<Self, T> {
+            unsafe {
+                PhysicalMapping::new(
+                    physical_address,
+                    ArchPhysAccess::as_ptr(PAddr::from(physical_address)),
+                    size,
+                    size,
+                    self.clone(),
+                )
+            }
+        }
+
+        fn unmap_physical_region<T>(_: &PhysicalMapping<Self, T>) {}
+    }
+
+    let acpi_tables = unsafe {
+        // SAFETY: Probing for RSDP in BIOS memory should be fine.
+        AcpiTables::search_for_rsdp_bios(Handler).unwrap()
+    };
+
+    let platform_info = PlatformInfo::new_in(&acpi_tables, &alloc).unwrap();
+    let processor_info = platform_info.processor_info.unwrap();
+
+    let ap_count = processor_info
+        .application_processors
+        .iter()
+        .filter(|ap| !matches!(ap.state, ProcessorState::Disabled))
+        .count();
+
+    unsafe {
+        CPU::local().bootstrap_cpus();
+    }
+
+    for current_count in 0..ap_count {
+        let stack_range = {
+            let page_alloc = BasicPageAllocRef::new(&page_alloc);
+
+            let ap_stack = Page::alloc_order_in(3, page_alloc);
+            let stack_range = ap_stack.range();
+            ap_stack.into_raw();
+
+            stack_range
+        };
+
+        // SAFETY: All the APs can see the allocator work done before this point.
+        let old = BSP_PAGE_ALLOC.swap((&raw const *page_alloc) as *mut _, Ordering::Release);
+        assert!(
+            old.is_null(),
+            "BSP_PAGE_ALLOC should be null before we release it"
+        );
+
+        // SAFETY: The AP reading the stack will see the allocation work.
+        while let Err(_) = AP_STACK.compare_exchange_weak(
+            0,
+            stack_range.end().addr(),
+            Ordering::Release,
+            Ordering::Relaxed,
+        ) {
+            // Spin until we can set the stack pointer for the AP.
+            spin_loop();
+        }
+
+        spin_loop();
+
+        // SAFETY: Make sure if we read the AP count, the allocator MUST have been released.
+        while AP_COUNT.load(Ordering::Acquire) == current_count {
+            // Wait for the AP to finish its initialization.
+            spin_loop();
+        }
+
+        // SAFETY: We acquire the work done by the AP.
+        let old = BSP_PAGE_ALLOC.swap(core::ptr::null_mut(), Ordering::Acquire);
+        assert_eq!(
+            old as *const _, &raw const *page_alloc,
+            "We should read the previously saved allocator"
+        );
+    }
+}
+
+pub extern "C" fn kernel_init() -> ! {
+    let global_page_table = &GLOBAL_PAGE_TABLE;
+    let paging_levels = ArchPagingMode::LEVELS;
+
+    enable_sse();
+
+    let real_allocator = RefCell::new(BasicPageAlloc::new());
+    let alloc = BasicPageAllocRef::new(&real_allocator);
+
+    unsafe extern "C" {
+        fn BSS_LENGTH();
+    }
+
+    for range in ArchMemory::free_ram() {
+        real_allocator.borrow_mut().add_range(range);
+    }
+
+    // Map kernel BSS
+    for pte in global_page_table.iter_kernel_in(
+        VRange::from(V_KERNEL_BSS_START).grow(BSS_LENGTH as usize),
+        paging_levels,
+        &alloc,
+    ) {
+        let attr = PageAttribute::PRESENT
+            | PageAttribute::WRITE
+            | PageAttribute::READ
+            | PageAttribute::HUGE
+            | PageAttribute::GLOBAL;
+
+        let page = Page::alloc_in(&alloc);
+        pte.set(page.into_raw(), attr.into());
+    }
+
+    unsafe {
+        // SAFETY: We've just mapped the area with sufficient length.
+        core::ptr::write_bytes(V_KERNEL_BSS_START.addr() as *mut (), 0, BSS_LENGTH as usize);
+    }
+
+    setup_cpu(&alloc);
+    setup_pic();
+
+    ScopedAllocator::new(&mut [0; 1024])
+        .with_alloc(|mem_alloc| bootstrap_smp(mem_alloc, &real_allocator));
+
+    unsafe extern "Rust" {
+        fn _eonix_hal_main(_: BootStrapData) -> !;
+    }
+
+    let bootstrap_data = BootStrapData {
+        early_stack: PRange::new(PAddr::from(0x6000), PAddr::from(0x80000)),
+        allocator: Some(real_allocator),
+    };
+
+    unsafe {
+        _eonix_hal_main(bootstrap_data);
+    }
+}
+
+pub extern "C" fn ap_entry(stack_bottom: PAddr) -> ! {
+    let stack_range = PRange::new(stack_bottom - (1 << 3) * PAGE_SIZE, stack_bottom);
+
+    {
+        // SAFETY: Acquire all the work done by the BSP and other APs.
+        let alloc = loop {
+            let alloc = BSP_PAGE_ALLOC.swap(core::ptr::null_mut(), Ordering::AcqRel);
+
+            if !alloc.is_null() {
+                break alloc;
+            }
+        };
+
+        let ref_alloc = unsafe { &*alloc };
+        setup_cpu(BasicPageAllocRef::new(&ref_alloc));
+
+        // SAFETY: Release our allocation work.
+        BSP_PAGE_ALLOC.store(alloc, Ordering::Release);
+    }
+
+    // SAFETY: Make sure the allocator is set before we increment the AP count.
+    AP_COUNT.fetch_add(1, Ordering::Release);
+
+    unsafe extern "Rust" {
+        fn _eonix_hal_ap_main(stack_range: PRange) -> !;
+    }
+
+    unsafe {
+        _eonix_hal_ap_main(stack_range);
+    }
+}

+ 115 - 37
arch/src/x86_64/init.rs → crates/eonix_hal/src/arch/x86_64/cpu.rs

@@ -1,5 +1,12 @@
-use super::{enable_sse, GDTEntry, InterruptControl, GDT};
-use core::{pin::Pin, ptr::addr_of};
+use super::gdt::{GDTEntry, GDT};
+use super::interrupt::InterruptControl;
+use core::marker::PhantomPinned;
+use core::pin::Pin;
+use eonix_preempt::PreemptGuard;
+use eonix_sync_base::LazyLock;
+
+#[eonix_percpu::define_percpu]
+static LOCAL_CPU: LazyLock<CPU> = LazyLock::new(CPU::new);
 
 #[repr(C)]
 #[derive(Debug, Clone, Copy)]
@@ -20,27 +27,18 @@ pub(crate) struct TSS {
     _reserved5: u32,
     _reserved6: u16,
     iomap_base: u16,
+    _pinned: PhantomPinned,
 }
 
-impl TSS {
-    pub fn new() -> Self {
-        Self {
-            _reserved1: 0,
-            rsp: [TSS_SP { low: 0, high: 0 }; 3],
-            _reserved2: 0,
-            _reserved3: 0,
-            ist: [TSS_SP { low: 0, high: 0 }; 7],
-            _reserved4: 0,
-            _reserved5: 0,
-            _reserved6: 0,
-            iomap_base: 0,
-        }
-    }
-
-    pub fn set_rsp0(&mut self, rsp: u64) {
-        self.rsp[0].low = rsp as u32;
-        self.rsp[0].high = (rsp >> 32) as u32;
-    }
+#[derive(Debug, Clone)]
+pub enum UserTLS {
+    /// TODO: This is not used yet.
+    #[allow(dead_code)]
+    TLS64(u64),
+    TLS32 {
+        base: u64,
+        desc: GDTEntry,
+    },
 }
 
 /// Architecture-specific cpu status data.
@@ -48,12 +46,28 @@ pub struct CPU {
     cpuid: usize,
     gdt: GDT,
     tss: TSS,
-    pub interrupt: InterruptControl,
+    interrupt: InterruptControl,
+}
+
+impl UserTLS {
+    /// # Return
+    /// Returns the TLS descriptor and the index of the TLS segment.
+    pub fn new32(base: u32, limit: u32, is_limit_in_pages: bool) -> (Self, u32) {
+        let flags = if is_limit_in_pages { 0xc } else { 0x4 };
+
+        (
+            Self::TLS32 {
+                base: base as u64,
+                desc: GDTEntry::new(base, limit, 0xf2, flags),
+            },
+            7,
+        )
+    }
 }
 
 impl CPU {
-    pub fn new(base: usize) -> Self {
-        let (interrupt_control, cpuid) = InterruptControl::new(base);
+    pub fn new() -> Self {
+        let (interrupt_control, cpuid) = InterruptControl::new();
 
         Self {
             cpuid,
@@ -67,19 +81,29 @@ impl CPU {
     ///
     /// # Safety
     /// Make sure preemption and interrupt are disabled before calling this function.
-    pub unsafe fn init(self: Pin<&mut Self>) {
-        enable_sse();
+    pub(crate) unsafe fn init(mut self: Pin<&mut Self>) {
+        let tss = &self.as_ref().get_ref().tss;
+        let tss_addr = tss as *const _ as u64;
+
+        let mut gdt = unsafe {
+            // SAFETY: We don't move the field out.
+            self.as_mut().map_unchecked_mut(|me| &mut me.gdt)
+        };
 
-        // SAFETY: We don't move the object.
-        let self_mut = self.get_unchecked_mut();
+        unsafe {
+            // SAFETY: We don't move `gdt` out.
+            gdt.as_mut().get_unchecked_mut().set_tss(tss_addr as u64);
+        }
+        gdt.load();
 
-        let tss_addr = addr_of!(self_mut.tss);
-        self_mut.gdt.set_tss(tss_addr as u64);
-        self_mut.gdt.load();
+        let mut interrupt = unsafe {
+            // SAFETY: We don't move the field out.
+            self.as_mut().map_unchecked_mut(|me| &mut me.interrupt)
+        };
 
         // SAFETY: `self` is pinned, so are its fields.
-        Pin::new_unchecked(&mut self_mut.interrupt).setup_idt();
-        self_mut.interrupt.setup_timer();
+        interrupt.as_mut().setup_idt();
+        interrupt.as_mut().setup_timer();
     }
 
     /// Bootstrap all CPUs.
@@ -88,17 +112,71 @@ impl CPU {
         self.interrupt.send_sipi();
     }
 
-    pub unsafe fn set_rsp0(&mut self, rsp: u64) {
-        self.tss.set_rsp0(rsp);
+    pub unsafe fn load_interrupt_stack(self: Pin<&mut Self>, rsp: u64) {
+        unsafe {
+            self.map_unchecked_mut(|me| &mut me.tss).set_rsp0(rsp);
+        }
     }
 
-    pub unsafe fn set_tls32(&mut self, desc: GDTEntry) {
-        self.gdt.set_tls32(desc);
+    pub fn set_tls32(self: Pin<&mut Self>, user_tls: &UserTLS) {
+        let UserTLS::TLS32 { desc, base } = user_tls else {
+            unimplemented!("TLS64 is not supported yet")
+        };
+
+        unsafe {
+            // SAFETY: We don't move the GDT object.
+            self.get_unchecked_mut().gdt.set_tls32(*desc);
+        }
+
+        const IA32_KERNEL_GS_BASE: u32 = 0xc0000102;
+        arch::wrmsr(IA32_KERNEL_GS_BASE, *base);
     }
 
     pub fn cpuid(&self) -> usize {
         self.cpuid
     }
+
+    pub fn end_of_interrupt(self: Pin<&mut Self>) {
+        unsafe {
+            // SAFETY: We don't move the `interrupt` field out.
+            self.map_unchecked_mut(|me| &mut me.interrupt)
+                .end_of_interrupt();
+        }
+    }
+
+    pub fn local() -> PreemptGuard<Pin<&'static mut Self>> {
+        unsafe {
+            // SAFETY: We pass the reference into a `PreemptGuard`, which ensures
+            //         that preemption is disabled.
+            PreemptGuard::new(Pin::new_unchecked(LOCAL_CPU.as_mut().get_mut()))
+        }
+    }
+}
+
+impl TSS {
+    pub fn new() -> Self {
+        Self {
+            _reserved1: 0,
+            rsp: [TSS_SP { low: 0, high: 0 }; 3],
+            _reserved2: 0,
+            _reserved3: 0,
+            ist: [TSS_SP { low: 0, high: 0 }; 7],
+            _reserved4: 0,
+            _reserved5: 0,
+            _reserved6: 0,
+            iomap_base: 0,
+            _pinned: PhantomPinned,
+        }
+    }
+
+    pub fn set_rsp0(self: Pin<&mut Self>, rsp: u64) {
+        unsafe {
+            // SAFETY: We don't move the TSS object.
+            let me = self.get_unchecked_mut();
+            me.rsp[0].low = rsp as u32;
+            me.rsp[0].high = (rsp >> 32) as u32;
+        }
+    }
 }
 
 #[macro_export]

+ 28 - 23
arch/src/x86_64/gdt.rs → crates/eonix_hal/src/arch/x86_64/gdt.rs

@@ -1,11 +1,11 @@
-use crate::TSS;
-use core::arch::asm;
+use super::cpu::TSS;
+use core::{arch::asm, marker::PhantomPinned};
 
 #[repr(transparent)]
 #[derive(Debug, Clone, Copy)]
 pub struct GDTEntry(u64);
 
-pub struct GDT([GDTEntry; GDT::LEN]);
+pub struct GDT([GDTEntry; GDT::LEN], PhantomPinned);
 
 impl GDTEntry {
     const NULL: Self = Self(0);
@@ -50,18 +50,21 @@ impl GDT {
     const TSS_INDEX: usize = 8;
 
     pub fn new() -> Self {
-        Self([
-            GDTEntry::NULL,
-            GDTEntry::KERNEL_CODE64,
-            GDTEntry::KERNEL_DATA64,
-            GDTEntry::USER_CODE64,
-            GDTEntry::USER_DATA64,
-            GDTEntry::USER_CODE32,
-            GDTEntry::USER_DATA32,
-            GDTEntry::NULL, // User TLS 32bit
-            GDTEntry::NULL, // TSS Descriptor Low
-            GDTEntry::NULL, // TSS Descriptor High
-        ])
+        Self(
+            [
+                GDTEntry::NULL,
+                GDTEntry::KERNEL_CODE64,
+                GDTEntry::KERNEL_DATA64,
+                GDTEntry::USER_CODE64,
+                GDTEntry::USER_DATA64,
+                GDTEntry::USER_CODE32,
+                GDTEntry::USER_DATA32,
+                GDTEntry::NULL, // User TLS 32bit
+                GDTEntry::NULL, // TSS Descriptor Low
+                GDTEntry::NULL, // TSS Descriptor High
+            ],
+            PhantomPinned,
+        )
     }
 
     pub fn set_tss(&mut self, base: u64) {
@@ -74,18 +77,20 @@ impl GDT {
         self.0[Self::TLS32_INDEX] = desc;
     }
 
-    pub unsafe fn load(&self) {
+    pub fn load(&self) {
         let len = Self::LEN * 8 - 1;
         let descriptor: [u64; 2] = [(len as u64) << 48, self.0.as_ptr() as u64];
         assert!(len < 0x10000, "GDT too large");
 
         let descriptor_address = &descriptor as *const _ as usize + 6;
-        asm!(
-            "lgdt ({})",
-            "ltr %ax",
-            in(reg) descriptor_address,
-            in("ax") Self::TSS_INDEX as u16 * 8,
-            options(att_syntax)
-        );
+        unsafe {
+            asm!(
+                "lgdt ({})",
+                "ltr %ax",
+                in(reg) descriptor_address,
+                in("ax") Self::TSS_INDEX as u16 * 8,
+                options(att_syntax, readonly, preserves_flags),
+            );
+        }
     }
 }

+ 18 - 54
arch/src/x86_64/interrupt.rs → crates/eonix_hal/src/arch/x86_64/interrupt.rs

@@ -1,6 +1,5 @@
-use super::pause;
-use crate::rdmsr;
-use core::{arch::asm, pin::Pin, ptr::NonNull};
+use arch::{pause, rdmsr};
+use core::{arch::asm, marker::PhantomPinned, pin::Pin, ptr::NonNull};
 
 #[repr(C)]
 #[derive(Clone, Copy)]
@@ -25,11 +24,9 @@ pub struct APICRegs {
 pub struct InterruptControl {
     idt: [IDTEntry; 256],
     apic_base: APICRegs,
+    _pinned: PhantomPinned,
 }
 
-/// State of the interrupt flag.
-pub struct IrqState(u64);
-
 impl IDTEntry {
     const fn new(offset: usize, selector: u16, attributes: u8) -> Self {
         Self {
@@ -111,10 +108,12 @@ impl APICRegs {
 impl InterruptControl {
     /// # Return
     /// Returns a tuple of InterruptControl and the cpu id of the current cpu.
-    pub(crate) fn new(base: usize) -> (Self, usize) {
+    pub fn new() -> (Self, usize) {
+        let trap_stubs_base = super::trap::trap_stubs_start as usize;
+
         let idt = core::array::from_fn(|idx| match idx {
-            0..0x80 => IDTEntry::new(base + 8 * idx, 0x08, 0x8e),
-            0x80 => IDTEntry::new(base + 8 * idx, 0x08, 0xee),
+            0..0x80 => IDTEntry::new(trap_stubs_base + 8 * idx, 0x08, 0x8e),
+            0x80 => IDTEntry::new(trap_stubs_base + 8 * idx, 0x08, 0xee),
             _ => IDTEntry::null(),
         });
 
@@ -134,7 +133,14 @@ impl InterruptControl {
 
         let cpuid = apic_base.local_apic_id().read() >> 24;
 
-        (Self { idt, apic_base }, cpuid as usize)
+        (
+            Self {
+                idt,
+                apic_base,
+                _pinned: PhantomPinned,
+            },
+            cpuid as usize,
+        )
     }
 
     pub fn setup_timer(&self) {
@@ -163,7 +169,7 @@ impl InterruptControl {
             pause();
         }
 
-        icr.write(0xc4607);
+        icr.write(0xc4606);
         while icr.read() & 0x1000 != 0 {
             pause();
         }
@@ -175,48 +181,6 @@ impl InterruptControl {
     }
 }
 
-impl IrqState {
-    pub fn restore(self) {
-        let Self(state) = self;
-
-        unsafe {
-            asm!(
-                "push {state}",
-                "popf",
-                state = in(reg) state,
-                options(att_syntax, nomem)
-            );
-        }
-    }
-}
-
-pub fn enable_irqs() {
-    unsafe {
-        asm!("sti", options(att_syntax, nomem, nostack));
-    }
-}
-
-pub fn disable_irqs() {
-    unsafe {
-        asm!("cli", options(att_syntax, nomem, nostack));
-    }
-}
-
-pub fn disable_irqs_save() -> IrqState {
-    let state: u64;
-    unsafe {
-        asm!(
-            "pushf",
-            "pop {state}",
-            "cli",
-            state = out(reg) state,
-            options(att_syntax, nomem)
-        );
-    }
-
-    IrqState(state)
-}
-
 fn lidt(base: usize, limit: u16) {
     let mut idt_descriptor = [0u16; 5];
 
@@ -227,6 +191,6 @@ fn lidt(base: usize, limit: u16) {
     idt_descriptor[4] = (base >> 48) as u16;
 
     unsafe {
-        asm!("lidt ({})", in(reg) &idt_descriptor, options(att_syntax));
+        asm!("lidt ({})", in(reg) &idt_descriptor, options(att_syntax, nostack, preserves_flags));
     }
 }

+ 5 - 3
crates/eonix_hal/src/arch/x86_64/link.x

@@ -19,7 +19,7 @@ SECTIONS {
         BYTE(0xaa);
     } > LOWMEM = 0x00
 
-    .stage1 0x7000 :
+    .stage1 0x6000 :
     {
         KEEP(*(.stage1.smp));
 
@@ -72,7 +72,7 @@ SECTIONS {
 INSERT AFTER .rodata;
 
 SECTIONS {
-    .percpu : ALIGN(16)
+    .percpu 0 : ALIGN(16)
     {
         __spercpu = .;
 
@@ -85,12 +85,14 @@ SECTIONS {
 
         . = ALIGN(16);
         __epercpu = .;
-    } > KPERCPU AT> REGION_RODATA
+    } > LOWMEM AT> REGION_RODATA
 
     PERCPU_DATA_START = LOADADDR(.percpu);
     PERCPU_LENGTH = ABSOLUTE(__epercpu - __spercpu);
 
     KIMAGE_PAGES = (__edata - _stext + 0x1000 - 1) / 0x1000;
     KIMAGE_32K_COUNT = (KIMAGE_PAGES + 8 - 1) / 8;
+
+    BSS_LENGTH = ABSOLUTE(__ebss - __sbss);
 }
 INSERT AFTER .rodata;

+ 3 - 4
crates/eonix_hal/src/arch/x86_64/memory.x

@@ -1,8 +1,7 @@
 MEMORY {
-    LOWMEM             : org = 0x0000000000000000, len = 1M
-    KBSS         (wa)  : org = 0xffffffffc0200000, len = 2M
-    KIMAGE       (rwx) : org = 0xffffffffffc00000, len = 2M
-    KPERCPU            : org = 0x0000000000000000, len = 128K
+    LOWMEM : org = 0x0000000000000000, len = 1M
+    KBSS   : org = 0xffffffffc0200000, len = 2M
+    KIMAGE : org = 0xffffffffffc00000, len = 2M
 }
 
 REGION_ALIAS("REGION_TEXT", KIMAGE);

+ 174 - 10
crates/eonix_hal/src/arch/x86_64/mm.rs

@@ -1,15 +1,14 @@
+use crate::traits::mm::Memory;
 use core::{marker::PhantomData, ptr::NonNull};
 use eonix_mm::{
-    address::{Addr as _, PAddr},
+    address::{Addr as _, AddrOps as _, PAddr, PRange, PhysAccess, VAddr},
     page_table::{
-        PageAttribute, PageTableLevel, PagingMode, RawAttribute, RawPageTable, TableAttribute, PTE,
+        PageAttribute, PageTable, PageTableLevel, PagingMode, RawAttribute, RawPageTable,
+        TableAttribute, PTE,
     },
-    paging::{PageBlock, PFN},
+    paging::{NoAlloc, Page, PageBlock, PAGE_SIZE, PFN},
 };
-
-pub const PAGE_SIZE: usize = 0x1000;
-
-pub const KERNEL_PML4_PFN: PFN = PFN::from_val(0x1000 >> 12);
+use eonix_sync_base::LazyLock;
 
 pub const PA_P: u64 = 0x001;
 pub const PA_RW: u64 = 0x002;
@@ -28,6 +27,18 @@ pub const PA_ANON: u64 = 0x800;
 pub const PA_NXE: u64 = 0x8000_0000_0000_0000;
 pub const PA_MASK: u64 = 0xfff0_0000_0000_0fff;
 
+pub const P_KIMAGE_START: PAddr = PAddr::from_val(0x200000);
+pub const V_KERNEL_BSS_START: VAddr = VAddr::from(0xffffffffc0200000);
+
+const KERNEL_PML4_PFN: PFN = PFN::from_val(0x1000 >> 12);
+
+pub static GLOBAL_PAGE_TABLE: LazyLock<PageTable<ArchPagingMode, NoAlloc, ArchPhysAccess>> =
+    LazyLock::new(|| unsafe {
+        Page::with_raw(KERNEL_PML4_PFN, |root_table_page| {
+            PageTable::with_root_table(root_table_page.clone())
+        })
+    });
+
 #[repr(transparent)]
 pub struct PTE64(u64);
 
@@ -38,6 +49,31 @@ pub struct RawPageTable4Levels<'a>(NonNull<PTE64>, PhantomData<&'a ()>);
 
 pub struct PagingMode4Levels;
 
+pub struct ArchPhysAccess;
+
+pub struct ArchMemory;
+
+#[repr(C)]
+#[derive(Copy, Clone)]
+struct E820MemMapEntry {
+    base: u64,
+    len: u64,
+    entry_type: u32,
+    acpi_attrs: u32,
+}
+
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct BootLoaderData {
+    entry_count: u32,
+    entry_length: u32,
+
+    block_count_1k: u32,
+    block_count_64k: u32,
+
+    all_entries: [E820MemMapEntry; 42],
+}
+
 impl PTE for PTE64 {
     type Attr = PageAttribute64;
 
@@ -65,8 +101,6 @@ impl PagingMode for PagingMode4Levels {
         PageTableLevel::new(21, 9),
         PageTableLevel::new(12, 9),
     ];
-
-    const KERNEL_ROOT_TABLE_PFN: PFN = KERNEL_PML4_PFN;
 }
 
 impl<'a> RawPageTable<'a> for RawPageTable4Levels<'a> {
@@ -208,4 +242,134 @@ impl From<TableAttribute> for PageAttribute64 {
     }
 }
 
-pub type DefaultPagingMode = PagingMode4Levels;
+pub type ArchPagingMode = PagingMode4Levels;
+
+impl ArchPhysAccess {
+    const PHYS_OFFSET: usize = 0xffff_ff00_0000_0000;
+}
+
+impl PhysAccess for ArchPhysAccess {
+    unsafe fn as_ptr<T>(paddr: PAddr) -> NonNull<T> {
+        let alignment: usize = align_of::<T>();
+        assert!(paddr.addr() % alignment == 0, "Alignment error");
+
+        unsafe {
+            // SAFETY: We can assume that we'll never have `self.addr()` equals
+            //         to `-PHYS_OFFSET`. Otherwise, the kernel might be broken.
+            NonNull::new_unchecked((Self::PHYS_OFFSET + paddr.addr()) as *mut T)
+        }
+    }
+
+    unsafe fn from_ptr<T>(ptr: NonNull<T>) -> PAddr {
+        let addr = ptr.addr().get();
+
+        assert!(addr % align_of::<T>() == 0, "Alignment error");
+        assert!(
+            addr >= Self::PHYS_OFFSET,
+            "Address is not a valid physical address"
+        );
+
+        PAddr::from_val(addr - Self::PHYS_OFFSET)
+    }
+}
+
+impl E820MemMapEntry {
+    const ENTRY_FREE: u32 = 1;
+    // const ENTRY_USED: u32 = 2;
+
+    const fn zeroed() -> Self {
+        Self {
+            base: 0,
+            len: 0,
+            entry_type: 0,
+            acpi_attrs: 0,
+        }
+    }
+
+    fn is_free(&self) -> bool {
+        self.entry_type == Self::ENTRY_FREE
+    }
+
+    // fn is_used(&self) -> bool {
+    //     self.entry_type == Self::ENTRY_USED
+    // }
+
+    fn range(&self) -> PRange {
+        PRange::from(PAddr::from(self.base as usize)).grow(self.len as usize)
+    }
+}
+
+impl BootLoaderData {
+    const fn zeroed() -> Self {
+        Self {
+            entry_count: 0,
+            entry_length: 0,
+            block_count_1k: 0,
+            block_count_64k: 0,
+            all_entries: [E820MemMapEntry::zeroed(); 42],
+        }
+    }
+
+    // fn memory_size(&self) -> usize {
+    //     // The initial 1M is not counted in the E820 map. We add them to the total as well.
+    //     ((self.block_count_1k + 64 * self.block_count_64k) * 1024 + 1 * 1024 * 1024) as usize
+    // }
+
+    fn entries(&self) -> &[E820MemMapEntry] {
+        &self.all_entries[..self.entry_count as usize]
+    }
+
+    fn free_entries(&self) -> impl Iterator<Item = &E820MemMapEntry> {
+        self.entries().iter().filter(|entry| entry.is_free())
+    }
+}
+
+#[unsafe(link_section = ".low")]
+pub static mut E820_MEM_MAP_DATA: BootLoaderData = BootLoaderData::zeroed();
+
+impl Memory for ArchMemory {
+    fn present_ram() -> impl Iterator<Item = PRange> {
+        let e820 = &raw const E820_MEM_MAP_DATA;
+
+        unsafe {
+            // SAFETY: We don't write to the E820 memory map after the bootstrap.
+            e820.as_ref()
+                .unwrap_unchecked()
+                .free_entries()
+                .map(|entry| entry.range())
+        }
+    }
+
+    fn free_ram() -> impl Iterator<Item = PRange> {
+        unsafe extern "C" {
+            fn KIMAGE_PAGES();
+        }
+
+        let kimage_pages = KIMAGE_PAGES as usize;
+
+        let paddr_after_kimage = P_KIMAGE_START + kimage_pages * PAGE_SIZE;
+        let paddr_after_kimage_aligned = paddr_after_kimage.ceil_to(0x200000);
+        let paddr_unused_start = paddr_after_kimage_aligned;
+
+        core::iter::once(PRange::new(
+            PAddr::from_val(0x100000),
+            PAddr::from_val(0x200000),
+        ))
+        .chain(core::iter::once(PRange::new(
+            paddr_after_kimage,
+            paddr_after_kimage_aligned,
+        )))
+        .chain(
+            Self::present_ram()
+                .filter(move |range| range.end() > paddr_unused_start)
+                .map(move |range| {
+                    if range.start() < paddr_unused_start {
+                        let (_, right) = range.split_at(paddr_unused_start);
+                        right
+                    } else {
+                        range
+                    }
+                }),
+        )
+    }
+}

+ 3 - 0
crates/eonix_hal/src/arch/x86_64/mod.rs

@@ -1,4 +1,7 @@
 pub mod bootstrap;
 pub mod context;
+pub mod cpu;
+pub mod gdt;
+pub mod interrupt;
 pub mod mm;
 pub mod trap;

+ 56 - 10
crates/eonix_hal/src/arch/x86_64/trap.rs

@@ -1,14 +1,17 @@
 mod trap_context;
 
 use super::context::TaskContext;
-use core::arch::{global_asm, naked_asm};
-use eonix_hal_traits::{context::RawTaskContext, trap::TrapReturn};
+use core::arch::{asm, global_asm, naked_asm};
+use eonix_hal_traits::{
+    context::RawTaskContext,
+    trap::{IrqState as IrqStateTrait, TrapReturn},
+};
 
 pub use trap_context::TrapContext;
 
 unsafe extern "C" {
     fn _default_trap_handler(trap_context: &mut TrapContext);
-    pub static TRAP_STUBS_START: usize;
+    pub fn trap_stubs_start();
     fn _raw_trap_return();
 }
 
@@ -21,6 +24,9 @@ static CAPTURER_CONTEXT: TaskContext = TaskContext::new();
 /// This value will never be used.
 static mut DIRTY_TRAP_CONTEXT: TaskContext = TaskContext::new();
 
+/// State of the interrupt flag.
+pub struct IrqState(u64);
+
 global_asm!(
     r"
     .set RAX, 0x00
@@ -64,11 +70,8 @@ global_asm!(
         .cfi_same_value %rbp
     .endm
 
-    .globl TRAP_STUBS_START
-    TRAP_STUBS_START:
-        .quad _trap_stubs_start
-
-    _trap_stubs_start:
+    .globl {trap_stubs_start}
+    {trap_stubs_start}:
         .altmacro
         .macro build_isr_no_err name
             .align 8
@@ -190,7 +193,7 @@ global_asm!(
         .cfi_rel_offset %rbp, RBP
         
         mov INT_NO(%rsp), %rcx
-        sub $_trap_stubs_start, %rcx
+        sub ${trap_stubs_start}, %rcx
         shr $3, %rcx
         mov %rcx, INT_NO(%rsp)
         
@@ -269,6 +272,7 @@ global_asm!(
         iretq
         .cfi_endproc
     ",
+    trap_stubs_start = sym trap_stubs_start,
     handler = sym _percpu_inner_TRAP_HANDLER,
     options(att_syntax),
 );
@@ -326,7 +330,7 @@ unsafe extern "C" fn captured_trap_return(trap_context: usize) -> ! {
 
 impl TrapReturn for TrapContext {
     unsafe fn trap_return(&mut self) {
-        let irq_states = arch::disable_irqs_save();
+        let irq_states = disable_irqs_save();
         let old_handler = TRAP_HANDLER.swap(captured_trap_handler);
 
         let mut to_ctx = TaskContext::new();
@@ -342,3 +346,45 @@ impl TrapReturn for TrapContext {
         irq_states.restore();
     }
 }
+
+impl IrqStateTrait for IrqState {
+    fn restore(self) {
+        let Self(state) = self;
+
+        unsafe {
+            asm!(
+                "push {state}",
+                "popf",
+                state = in(reg) state,
+                options(att_syntax, nomem)
+            );
+        }
+    }
+}
+
+pub fn enable_irqs() {
+    unsafe {
+        asm!("sti", options(att_syntax, nomem, nostack));
+    }
+}
+
+pub fn disable_irqs() {
+    unsafe {
+        asm!("cli", options(att_syntax, nomem, nostack));
+    }
+}
+
+pub fn disable_irqs_save() -> IrqState {
+    let state: u64;
+    unsafe {
+        asm!(
+            "pushf",
+            "pop {state}",
+            "cli",
+            state = out(reg) state,
+            options(att_syntax, nomem)
+        );
+    }
+
+    IrqState(state)
+}

+ 22 - 0
crates/eonix_hal/src/bootstrap.rs

@@ -0,0 +1,22 @@
+use crate::mm::{BasicPageAlloc, BasicPageAllocRef};
+use core::cell::RefCell;
+use eonix_mm::address::PRange;
+
+pub struct BootStrapData {
+    pub(crate) early_stack: PRange,
+    pub(crate) allocator: Option<RefCell<BasicPageAlloc>>,
+}
+
+impl BootStrapData {
+    pub fn get_alloc(&self) -> Option<BasicPageAllocRef<'_>> {
+        self.allocator.as_ref().map(BasicPageAllocRef::new)
+    }
+
+    pub fn take_alloc(&mut self) -> Option<BasicPageAlloc> {
+        self.allocator.take().map(RefCell::into_inner)
+    }
+
+    pub fn get_early_stack(&self) -> PRange {
+        self.early_stack
+    }
+}

+ 3 - 1
crates/eonix_hal/src/lib.rs

@@ -1,12 +1,14 @@
 #![no_std]
+#![feature(allocator_api)]
 #![feature(doc_notable_trait)]
 
 pub(crate) mod arch;
 
+pub mod bootstrap;
 pub mod context;
 pub mod mm;
 pub mod processor;
 pub mod trap;
 
-pub use eonix_hal_macros::default_trap_handler;
+pub use eonix_hal_macros::{ap_main, default_trap_handler, main};
 pub use eonix_hal_traits as traits;

+ 198 - 1
crates/eonix_hal/src/mm.rs

@@ -1 +1,198 @@
-pub use crate::arch::mm::{DefaultPagingMode, PAGE_SIZE};
+use core::{
+    alloc::{AllocError, Allocator, Layout},
+    cell::RefCell,
+    ptr::NonNull,
+};
+use eonix_mm::{
+    address::{AddrOps as _, PRange},
+    paging::{PageAlloc, UnmanagedRawPage, PAGE_SIZE, PFN},
+};
+
+pub use crate::arch::mm::{ArchMemory, ArchPagingMode, ArchPhysAccess, GLOBAL_PAGE_TABLE};
+
+pub struct BasicPageAlloc {
+    ranges: [Option<PRange>; Self::MAX],
+    head: usize,
+    tail: usize,
+}
+
+struct ScopedAllocInner<'a> {
+    _memory: &'a mut [u8],
+    current: NonNull<[u8]>,
+    allocated_count: usize,
+}
+
+pub struct ScopedAllocator<'a> {
+    inner: RefCell<ScopedAllocInner<'a>>,
+}
+
+impl BasicPageAlloc {
+    const MAX: usize = 32;
+
+    pub const fn new() -> Self {
+        Self {
+            ranges: [None; Self::MAX],
+            head: 0,
+            tail: 0,
+        }
+    }
+
+    fn alloc_one(&mut self) -> PFN {
+        assert_ne!(self.head, self.tail, "No free pages available");
+        let mut range = self.ranges[self.head].take().unwrap();
+        range = range.shrink(PAGE_SIZE);
+
+        let pfn = PFN::from(range.end());
+
+        if range.len() != 0 {
+            self.ranges[self.head] = Some(range);
+        } else {
+            self.head += 1;
+            self.head %= Self::MAX;
+        }
+
+        pfn
+    }
+
+    fn alloc_order(&mut self, order: u32) -> PFN {
+        assert!(order <= 4);
+        let me = core::mem::replace(self, Self::new());
+
+        let mut found = None;
+        for mut range in me.into_iter() {
+            if found.is_some() || range.len() < (PAGE_SIZE << order) {
+                self.add_range(range);
+                continue;
+            }
+
+            range = range.shrink(PAGE_SIZE << order);
+            found = Some(PFN::from(range.end()));
+
+            if range.len() != 0 {
+                self.add_range(range);
+            }
+        }
+
+        found.expect("No free pages available for the requested order")
+    }
+
+    pub fn add_range(&mut self, range: PRange) {
+        let tail = self.tail;
+
+        self.tail += 1;
+        self.tail %= Self::MAX;
+
+        if self.tail == self.head {
+            panic!("Page allocator is full");
+        }
+
+        self.ranges[tail] = Some(PRange::new(range.start().ceil(), range.end().floor()));
+    }
+
+    pub fn alloc(&mut self, order: u32) -> PFN {
+        match order {
+            0 => self.alloc_one(),
+            ..=4 => self.alloc_order(order),
+            _ => panic!("Order {} is too large for BasicPageAlloc", order),
+        }
+    }
+
+    pub fn into_iter(self) -> impl Iterator<Item = PRange> {
+        self.ranges
+            .into_iter()
+            .cycle()
+            .skip(self.head)
+            .map_while(|x| x)
+    }
+}
+
+#[derive(Clone)]
+pub struct BasicPageAllocRef<'a>(&'a RefCell<BasicPageAlloc>);
+
+impl<'a> BasicPageAllocRef<'a> {
+    pub const fn new(alloc: &'a RefCell<BasicPageAlloc>) -> Self {
+        Self(alloc)
+    }
+}
+
+impl PageAlloc for BasicPageAllocRef<'_> {
+    type RawPage = UnmanagedRawPage;
+
+    fn alloc_order(&self, order: u32) -> Option<Self::RawPage> {
+        Some(Self::RawPage::new(self.0.borrow_mut().alloc(order), order))
+    }
+
+    unsafe fn dealloc(&self, _: Self::RawPage) {
+        panic!("Dealloc is not supported in BasicPageAlloc");
+    }
+
+    fn has_management_over(&self, _: Self::RawPage) -> bool {
+        true
+    }
+}
+
+impl<'a> ScopedAllocator<'a> {
+    pub fn new(memory: &'a mut [u8]) -> Self {
+        ScopedAllocator {
+            inner: RefCell::new(ScopedAllocInner {
+                current: NonNull::new(memory).unwrap(),
+                _memory: memory,
+                allocated_count: 0,
+            }),
+        }
+    }
+
+    pub fn with_alloc<'b, 'r, O>(&'r self, func: impl FnOnce(&'b ScopedAllocator<'a>) -> O) -> O
+    where
+        'a: 'b,
+        'r: 'b,
+    {
+        func(self)
+    }
+}
+
+unsafe impl Allocator for &ScopedAllocator<'_> {
+    fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
+        let mut inner = self.inner.borrow_mut();
+        let memory = &mut inner.current;
+
+        let addr: NonNull<u8> = memory.cast();
+        let offset = addr.align_offset(layout.align());
+
+        if offset + layout.size() > memory.len() {
+            return Err(AllocError);
+        }
+
+        let allocated = unsafe {
+            // SAFETY: `addr + offset` won't overflow.
+            NonNull::slice_from_raw_parts(addr.add(offset), layout.size())
+        };
+
+        unsafe {
+            // SAFETY: `allocated + layout.size()` won't overflow.
+            *memory = NonNull::slice_from_raw_parts(
+                allocated.cast::<u8>().add(layout.size()),
+                memory.len() - offset - layout.size(),
+            );
+        }
+
+        inner.allocated_count += 1;
+        Ok(allocated)
+    }
+
+    unsafe fn deallocate(&self, _: NonNull<u8>, _: Layout) {
+        self.inner.borrow_mut().allocated_count -= 1;
+    }
+}
+
+impl Drop for ScopedAllocator<'_> {
+    fn drop(&mut self) {
+        let inner = self.inner.borrow();
+        if inner.allocated_count > 0 {
+            panic!(
+                "Memory leak detected: {} allocations not deallocated",
+                inner.allocated_count
+            );
+        }
+    }
+}

+ 1 - 1
crates/eonix_hal/src/processor.rs

@@ -1 +1 @@
-
+pub use crate::arch::cpu::{UserTLS, CPU};

+ 1 - 4
crates/eonix_hal/src/trap.rs

@@ -1,8 +1,5 @@
 use eonix_hal_traits::trap::IsRawTrapContext;
 
-pub use crate::arch::trap::TrapContext;
-
-// TODO: Remove this once the arch module is fully implemented.
-pub use crate::arch::trap::TRAP_STUBS_START;
+pub use crate::arch::trap::{disable_irqs, disable_irqs_save, enable_irqs, IrqState, TrapContext};
 
 struct _CheckTrapContext(IsRawTrapContext<TrapContext>);

+ 1 - 1
crates/eonix_mm/src/address.rs

@@ -7,7 +7,7 @@ mod vaddr;
 pub use addr::{Addr, AddrOps};
 pub use addr_range::AddrRange;
 pub use error::AddressError;
-pub use paddr::PAddr;
+pub use paddr::{PAddr, PhysAccess};
 pub use vaddr::VAddr;
 
 pub type PRange = AddrRange<PAddr>;

+ 24 - 0
crates/eonix_mm/src/address/paddr.rs

@@ -3,8 +3,32 @@ use crate::paging::{PAGE_SIZE_BITS, PFN};
 use core::{
     fmt,
     ops::{Add, Sub},
+    ptr::NonNull,
 };
 
+pub trait PhysAccess {
+    /// Translate the data that this address is pointing to into kernel
+    /// accessible pointer. Use it with care.
+    ///
+    /// # Panic
+    /// If the address is not properly aligned.
+    ///
+    /// # Safety
+    /// The caller must ensure that the data is of type `T`.
+    /// Otherwise, it may lead to undefined behavior.
+    unsafe fn as_ptr<T>(paddr: PAddr) -> NonNull<T>;
+
+    /// Translate the kernel accessible pointer back into a physical address.
+    ///
+    /// # Panic
+    /// If the pointer is not properly aligned.
+    ///
+    /// # Safety
+    /// The caller must ensure that the pointer is valid and points to a
+    /// valid physical memory location.
+    unsafe fn from_ptr<T>(ptr: NonNull<T>) -> PAddr;
+}
+
 #[repr(transparent)]
 #[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]
 pub struct PAddr(usize);

+ 46 - 18
crates/eonix_mm/src/page_table/page_table.rs

@@ -49,10 +49,20 @@ where
         }
     }
 
-    pub fn new_in<A1: PageAlloc>(kernel_root_table_page: &Page<A1>, alloc: A) -> Self {
+    pub fn clone_global<'b, B>(&self) -> PageTable<'b, M, B, X>
+    where
+        B: GlobalPageAlloc,
+    {
+        self.clone_in(B::global())
+    }
+
+    pub fn clone_in<'b, B>(&self, alloc: B) -> PageTable<'b, M, B, X>
+    where
+        B: PageAlloc,
+    {
         let new_root_table_page = Page::alloc_in(alloc);
         let new_table_data = X::get_ptr_for_page(&new_root_table_page);
-        let kernel_table_data = X::get_ptr_for_page(kernel_root_table_page);
+        let kernel_table_data = X::get_ptr_for_page(&self.root_table_page);
 
         unsafe {
             // SAFETY: `new_table_data` and `kernel_table_data` are both valid pointers
@@ -72,7 +82,7 @@ where
             root_page_table.index_mut(idx).take();
         }
 
-        Self::with_root_table(new_root_table_page)
+        PageTable::with_root_table(new_root_table_page)
     }
 
     pub fn addr(&self) -> PAddr {
@@ -119,17 +129,47 @@ where
         range: VRange,
         levels: &'static [PageTableLevel],
     ) -> impl Iterator<Item = &mut M::Entry> {
-        let alloc = self.root_table_page.allocator();
+        self.iter_kernel_in(range, levels, self.root_table_page.allocator())
+    }
+
+    /// Iterates over the kernel space entries in the page table for the specified levels
+    /// with a given page allocator.
+    ///
+    /// # Parameters
+    /// - `range`: The virtual address range to iterate over.
+    /// - `levels`: A slice of `PageTableLevel` that specifies which levels of the page table
+    ///   should be included in the iteration. Each level corresponds to a level in the page
+    ///   table hierarchy, and the iterator will traverse entries at these levels.
+    /// - `alloc`: A page allocator that provides memory for the page table entries.
+    ///
+    /// # Returns
+    /// An iterator over mutable references to the page table entries (`M::Entry`) within the
+    /// specified range and levels.
+    ///
+    /// # Example
+    /// ```no_run
+    /// let range = VRange::new(0x1234000, 0x1300000);
+    /// let levels = &M::LEVELS[..2];
+    /// for pte in page_table.iter_kernel_in(range, levels, NoAlloc) {
+    ///     // Process each entry
+    /// }
+    /// ```
+    pub fn iter_kernel_in<A1: PageAlloc>(
+        &self,
+        range: VRange,
+        levels: &'static [PageTableLevel],
+        alloc: A1,
+    ) -> impl Iterator<Item = &mut M::Entry> {
         let page_table_ptr = X::get_ptr_for_page(&self.root_table_page);
         let root_page_table = unsafe {
             // SAFETY: `page_table_ptr` is a valid pointer to a page table.
             M::RawTable::from_ptr(page_table_ptr)
         };
 
-        PageTableIterator::<M, A, X, KernelIterator>::with_levels(
+        PageTableIterator::<M, A1, X, KernelIterator>::with_levels(
             root_page_table,
             range,
-            alloc.clone(),
+            alloc,
             levels,
         )
     }
@@ -165,18 +205,6 @@ where
     }
 }
 
-impl<'a, M, A, X> PageTable<'a, M, A, X>
-where
-    M: PagingMode,
-    M::Entry: 'a,
-    A: GlobalPageAlloc,
-    X: PageAccess,
-{
-    pub fn new<A1: PageAlloc>(kernel_root_table_page: &Page<A1>) -> Self {
-        Self::new_in(kernel_root_table_page, A::global())
-    }
-}
-
 impl<'a, M, A, X> Drop for PageTable<'a, M, A, X>
 where
     M: PagingMode,

+ 1 - 5
crates/eonix_mm/src/page_table/paging_mode.rs

@@ -1,15 +1,11 @@
 use super::{RawPageTable, PTE};
-use crate::{
-    address::{Addr as _, VAddr},
-    paging::PFN,
-};
+use crate::address::{Addr as _, VAddr};
 
 pub trait PagingMode {
     type Entry: PTE;
     type RawTable<'a>: RawPageTable<'a, Entry = Self::Entry>;
 
     const LEVELS: &'static [PageTableLevel];
-    const KERNEL_ROOT_TABLE_PFN: PFN;
 }
 
 #[derive(Clone, Copy, PartialOrd, PartialEq)]

+ 1 - 1
crates/eonix_mm/src/paging.rs

@@ -6,4 +6,4 @@ mod raw_page;
 pub use page::{Page, PageAccess, PageBlock, PAGE_SIZE, PAGE_SIZE_BITS};
 pub use page_alloc::{GlobalPageAlloc, NoAlloc, PageAlloc};
 pub use pfn::PFN;
-pub use raw_page::RawPage;
+pub use raw_page::{RawPage, UnmanagedRawPage};

+ 14 - 1
crates/eonix_mm/src/paging/page.rs

@@ -1,5 +1,5 @@
 use super::{GlobalPageAlloc, PageAlloc, RawPage as _, PFN};
-use crate::address::{AddrRange, PAddr};
+use crate::address::{AddrRange, PAddr, PhysAccess};
 use core::{fmt, mem::ManuallyDrop, ptr::NonNull, sync::atomic::Ordering};
 
 pub const PAGE_SIZE: usize = 4096;
@@ -305,3 +305,16 @@ impl<A: PageAlloc> fmt::Debug for Page<A> {
         )
     }
 }
+
+impl<T> PageAccess for T
+where
+    T: PhysAccess,
+{
+    unsafe fn get_ptr_for_pfn(pfn: PFN) -> NonNull<PageBlock> {
+        unsafe {
+            // SAFETY: The physical address of a existing page must be
+            //         aligned to the page size.
+            T::as_ptr(PAddr::from(pfn))
+        }
+    }
+}

+ 6 - 6
crates/eonix_mm/src/paging/raw_page.rs

@@ -12,34 +12,34 @@ pub trait RawPage: Clone + Copy + From<PFN> + Into<PFN> {
 }
 
 #[derive(Clone, Copy)]
-pub struct UnmanagedRawPage(PFN);
+pub struct UnmanagedRawPage(PFN, u32);
 
 /// Unmanaged raw pages should always have a non-zero refcount to
 /// avoid `free()` from being called.
 static UNMANAGED_RAW_PAGE_CLONE_COUNT: AtomicUsize = AtomicUsize::new(1);
 
 impl UnmanagedRawPage {
-    pub const fn new(pfn: PFN) -> Self {
-        Self(pfn)
+    pub const fn new(pfn: PFN, order: u32) -> Self {
+        Self(pfn, order)
     }
 }
 
 impl From<PFN> for UnmanagedRawPage {
     fn from(value: PFN) -> Self {
-        Self::new(value)
+        Self::new(value, 0)
     }
 }
 
 impl Into<PFN> for UnmanagedRawPage {
     fn into(self) -> PFN {
-        let Self(pfn) = self;
+        let Self(pfn, _) = self;
         pfn
     }
 }
 
 impl RawPage for UnmanagedRawPage {
     fn order(&self) -> u32 {
-        0
+        self.1
     }
 
     fn refcount(&self) -> &AtomicUsize {

+ 1 - 1
crates/eonix_percpu/eonix_percpu_macros/src/lib.rs

@@ -157,7 +157,7 @@ fn define_percpu_shared_impl(
 
             pub fn get_for_cpu(&self, cpuid: usize) -> Option<& #ty > {
                 let offset = & #inner_ident as *const _ as usize;
-                let base = ::arch::PercpuArea::get_for(cpuid);
+                let base = ::eonix_percpu::PercpuArea::get_for(cpuid);
                 base.map(|base| unsafe { base.byte_add(offset).cast().as_ref() })
             }
         }

+ 72 - 0
crates/eonix_percpu/src/lib.rs

@@ -1,7 +1,79 @@
 #![no_std]
 
+use core::alloc::Layout;
+use core::ptr::null_mut;
+use core::ptr::NonNull;
+use core::sync::atomic::AtomicPtr;
+use core::sync::atomic::Ordering;
+
 #[cfg(target_arch = "x86_64")]
 pub use eonix_percpu_macros::define_percpu_x86_64 as define_percpu;
 
 #[cfg(target_arch = "x86_64")]
 pub use eonix_percpu_macros::define_percpu_shared_x86_64 as define_percpu_shared;
+
+const MAX_CPUS: usize = 256;
+
+#[repr(align(16))]
+pub struct PercpuData();
+
+pub struct PercpuArea {
+    data: NonNull<PercpuData>,
+}
+
+static PERCPU_POINTERS: [AtomicPtr<PercpuData>; MAX_CPUS] =
+    [const { AtomicPtr::new(null_mut()) }; MAX_CPUS];
+
+impl PercpuArea {
+    fn len() -> usize {
+        unsafe extern "C" {
+            fn PERCPU_LENGTH();
+        }
+        let len = PERCPU_LENGTH as usize;
+
+        assert_ne!(len, 0, "Percpu length should not be zero.");
+        len
+    }
+
+    fn data_start() -> NonNull<u8> {
+        unsafe extern "C" {
+            fn PERCPU_DATA_START();
+        }
+
+        let addr = PERCPU_DATA_START as usize;
+        NonNull::new(addr as *mut _).expect("Percpu data should not be null.")
+    }
+
+    fn layout() -> Layout {
+        Layout::from_size_align(Self::len(), align_of::<PercpuData>()).expect("Invalid layout.")
+    }
+
+    pub fn new<F>(allocate: F) -> Self
+    where
+        F: FnOnce(Layout) -> NonNull<u8>,
+    {
+        let data_pointer = allocate(Self::layout());
+
+        unsafe {
+            // SAFETY: The `data_pointer` is of valid length and properly aligned.
+            data_pointer.copy_from_nonoverlapping(Self::data_start(), Self::len());
+        }
+
+        Self {
+            data: data_pointer.cast(),
+        }
+    }
+
+    pub fn register(self, cpuid: usize) {
+        PERCPU_POINTERS[cpuid].store(self.data.as_ptr(), Ordering::Release);
+    }
+
+    pub fn get_for(cpuid: usize) -> Option<NonNull<()>> {
+        let pointer = PERCPU_POINTERS[cpuid].load(Ordering::Acquire);
+        NonNull::new(pointer.cast())
+    }
+
+    pub fn setup(&mut self, func: impl FnOnce(NonNull<PercpuData>)) {
+        func(self.data)
+    }
+}

+ 41 - 10
crates/eonix_preempt/src/lib.rs

@@ -1,6 +1,16 @@
 #![no_std]
 
-use core::sync::atomic::{compiler_fence, Ordering};
+use core::{
+    ops::{Deref, DerefMut},
+    sync::atomic::{compiler_fence, Ordering},
+};
+
+pub struct PreemptGuard<T>
+where
+    T: ?Sized,
+{
+    value: T,
+}
 
 #[eonix_percpu::define_percpu]
 static PREEMPT_COUNT: usize = 0;
@@ -66,17 +76,38 @@ macro_rules! assert_preempt_count_eq {
     }};
 }
 
-#[unsafe(no_mangle)]
-pub extern "C" fn r_preempt_disable() {
-    disable();
+impl<T> PreemptGuard<T> {
+    pub fn new(value: T) -> Self {
+        disable();
+        Self { value }
+    }
+}
+
+impl<T> Deref for PreemptGuard<T>
+where
+    T: ?Sized,
+{
+    type Target = T;
+
+    fn deref(&self) -> &Self::Target {
+        &self.value
+    }
 }
 
-#[unsafe(no_mangle)]
-pub extern "C" fn r_preempt_enable() {
-    enable();
+impl<T> DerefMut for PreemptGuard<T>
+where
+    T: ?Sized,
+{
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.value
+    }
 }
 
-#[unsafe(no_mangle)]
-pub extern "C" fn r_preempt_count() -> usize {
-    count()
+impl<T> Drop for PreemptGuard<T>
+where
+    T: ?Sized,
+{
+    fn drop(&mut self) {
+        enable();
+    }
 }

+ 1 - 0
crates/eonix_runtime/src/scheduler.rs

@@ -92,6 +92,7 @@ impl Scheduler {
             let context: &mut ExecutionContext = LOCAL_SCHEDULER_CONTEXT.as_mut();
             context.set_ip(local_scheduler as _);
             context.set_sp(stack.get_bottom().addr().get() as usize);
+            context.set_interrupt(true);
             eonix_preempt::enable();
         }
 

+ 6 - 7
crates/eonix_sync/Cargo.toml

@@ -1,13 +1,12 @@
+[workspace]
+members = ["eonix_spin", "eonix_sync_rt", "eonix_sync_base"]
+
 [package]
 name = "eonix_sync"
 version = "0.1.0"
 edition = "2024"
 
 [dependencies]
-arch = { path = "../../arch" }
-eonix_preempt = { path = "../eonix_preempt" }
-intrusive-collections = "0.9.7"
-
-[features]
-default = []
-no_check_locked = []
+eonix_spin = { path = "./eonix_spin" }
+eonix_sync_base = { path = "./eonix_sync_base" }
+eonix_sync_rt = { path = "./eonix_sync_rt" }

+ 8 - 0
crates/eonix_sync/eonix_spin/Cargo.toml

@@ -0,0 +1,8 @@
+[package]
+name = "eonix_spin"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+eonix_preempt = { path = "../../eonix_preempt" }
+eonix_sync_base = { path = "../eonix_sync_base" }

+ 1 - 1
crates/eonix_sync/src/spin/guard.rs → crates/eonix_sync/eonix_spin/src/guard.rs

@@ -1,12 +1,12 @@
 use super::{
     ContextUnlock, DisablePreemption, Relax, Spin, SpinContext, SpinRelax, UnlockedContext,
 };
-use crate::{marker::NotSend, UnlockableGuard, UnlockedGuard};
 use core::{
     marker::PhantomData,
     mem::ManuallyDrop,
     ops::{Deref, DerefMut},
 };
+use eonix_sync_base::{NotSend, UnlockableGuard, UnlockedGuard};
 
 pub struct SpinGuard<'a, T, C = DisablePreemption, R = SpinRelax>
 where

+ 11 - 4
crates/eonix_sync/src/spin.rs → crates/eonix_sync/eonix_spin/src/lib.rs

@@ -1,16 +1,15 @@
+#![no_std]
+
 mod guard;
-mod relax;
-mod spin_irq;
 
 use core::{
     cell::UnsafeCell,
     marker::PhantomData,
     sync::atomic::{AtomicBool, Ordering},
 };
+use eonix_sync_base::{Relax, SpinRelax};
 
 pub use guard::{SpinGuard, UnlockedSpinGuard};
-pub use relax::{LoopRelax, Relax, SpinRelax};
-pub use spin_irq::SpinIrq;
 
 pub trait SpinContext {
     fn save() -> Self;
@@ -57,6 +56,14 @@ where
             _phantom: PhantomData,
         }
     }
+
+    pub fn into_inner(mut self) -> T {
+        assert!(
+            !*self.locked.get_mut(),
+            "Spin::take(): Cannot take a locked Spin"
+        );
+        self.value.into_inner()
+    }
 }
 
 impl<T, R> Spin<T, R>

+ 10 - 0
crates/eonix_sync/eonix_sync_base/Cargo.toml

@@ -0,0 +1,10 @@
+[package]
+name = "eonix_sync_base"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+
+[features]
+default = []
+no_check_locked = []

+ 0 - 0
crates/eonix_sync/src/guard.rs → crates/eonix_sync/eonix_sync_base/src/guard.rs


+ 0 - 0
crates/eonix_sync/src/lazy_lock.rs → crates/eonix_sync/eonix_sync_base/src/lazy_lock.rs


+ 13 - 0
crates/eonix_sync/eonix_sync_base/src/lib.rs

@@ -0,0 +1,13 @@
+#![no_std]
+
+mod guard;
+mod lazy_lock;
+mod locked;
+mod marker;
+mod relax;
+
+pub use guard::{UnlockableGuard, UnlockedGuard};
+pub use lazy_lock::LazyLock;
+pub use locked::{AsProof, AsProofMut, Locked, Proof, ProofMut};
+pub use marker::{NotSend, NotSync};
+pub use relax::{LoopRelax, Relax, SpinRelax};

+ 0 - 0
crates/eonix_sync/src/locked.rs → crates/eonix_sync/eonix_sync_base/src/locked.rs


+ 0 - 0
crates/eonix_sync/src/locked/proof.rs → crates/eonix_sync/eonix_sync_base/src/locked/proof.rs


+ 0 - 0
crates/eonix_sync/src/marker.rs → crates/eonix_sync/eonix_sync_base/src/marker.rs


+ 0 - 0
crates/eonix_sync/src/spin/relax.rs → crates/eonix_sync/eonix_sync_base/src/relax.rs


+ 13 - 0
crates/eonix_sync/eonix_sync_rt/Cargo.toml

@@ -0,0 +1,13 @@
+[package]
+name = "eonix_sync_rt"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+arch = { path = "../../../arch" }
+eonix_hal = { path = "../../eonix_hal" }
+eonix_preempt = { path = "../../eonix_preempt" }
+eonix_spin = { path = "../eonix_spin" }
+eonix_sync_base = { path = "../eonix_sync_base" }
+
+intrusive-collections = "0.9.7"

+ 11 - 0
crates/eonix_sync/eonix_sync_rt/src/lib.rs

@@ -0,0 +1,11 @@
+#![no_std]
+
+mod mutex;
+mod rwlock;
+mod spin_irq;
+mod wait_list;
+
+pub use mutex::{Mutex, MutexGuard};
+pub use rwlock::{RwLock, RwLockReadGuard, RwLockWriteGuard};
+pub use spin_irq::SpinIrq;
+pub use wait_list::{WaitHandle, WaitList};

+ 0 - 0
crates/eonix_sync/src/mutex.rs → crates/eonix_sync/eonix_sync_rt/src/mutex.rs


+ 1 - 1
crates/eonix_sync/src/mutex/guard.rs → crates/eonix_sync/eonix_sync_rt/src/mutex/guard.rs

@@ -1,9 +1,9 @@
 use super::Mutex;
-use crate::{UnlockableGuard, UnlockedGuard};
 use core::{
     ops::{Deref, DerefMut},
     sync::atomic::Ordering,
 };
+use eonix_sync_base::{UnlockableGuard, UnlockedGuard};
 
 pub struct MutexGuard<'a, T>
 where

+ 0 - 0
crates/eonix_sync/src/rwlock.rs → crates/eonix_sync/eonix_sync_rt/src/rwlock.rs


+ 1 - 1
crates/eonix_sync/src/rwlock/guard.rs → crates/eonix_sync/eonix_sync_rt/src/rwlock/guard.rs

@@ -1,6 +1,6 @@
 use super::RwLock;
-use crate::{AsProof, AsProofMut, Proof, ProofMut, UnlockableGuard, UnlockedGuard};
 use core::ops::{Deref, DerefMut};
+use eonix_sync_base::{AsProof, AsProofMut, Proof, ProofMut, UnlockableGuard, UnlockedGuard};
 
 pub struct RwLockWriteGuard<'a, T>
 where

+ 7 - 4
crates/eonix_sync/src/spin/spin_irq.rs → crates/eonix_sync/eonix_sync_rt/src/spin_irq.rs

@@ -1,8 +1,11 @@
-use super::{ContextUnlock, Relax, Spin, SpinContext, SpinGuard, UnlockedContext};
+use eonix_hal::traits::trap::IrqState as _;
+use eonix_hal::trap::{disable_irqs_save, IrqState};
+use eonix_spin::{ContextUnlock, Spin, SpinContext, SpinGuard, UnlockedContext};
+use eonix_sync_base::Relax;
 
-pub struct IrqContext(arch::IrqState);
+pub struct IrqContext(IrqState);
 
-pub struct UnlockedIrqContext(arch::IrqState);
+pub struct UnlockedIrqContext(IrqState);
 
 pub trait SpinIrq {
     type Value: ?Sized;
@@ -14,7 +17,7 @@ pub trait SpinIrq {
 
 impl SpinContext for IrqContext {
     fn save() -> Self {
-        IrqContext(arch::disable_irqs_save())
+        IrqContext(disable_irqs_save())
     }
 
     fn restore(self) {

+ 3 - 1
crates/eonix_sync/src/wait_list.rs → crates/eonix_sync/eonix_sync_rt/src/wait_list.rs

@@ -1,8 +1,10 @@
 mod wait_handle;
 mod wait_object;
 
-use crate::{LazyLock, Spin, SpinIrq as _};
+use crate::SpinIrq as _;
 use core::fmt;
+use eonix_spin::Spin;
+use eonix_sync_base::LazyLock;
 use intrusive_collections::{linked_list::CursorMut, LinkedList};
 use wait_object::{WaitObject, WaitObjectAdapter};
 

+ 0 - 0
crates/eonix_sync/src/wait_list/wait_handle.rs → crates/eonix_sync/eonix_sync_rt/src/wait_list/wait_handle.rs


+ 2 - 1
crates/eonix_sync/src/wait_list/wait_object.rs → crates/eonix_sync/eonix_sync_rt/src/wait_list/wait_object.rs

@@ -1,5 +1,5 @@
 use super::WaitList;
-use crate::{Spin, SpinIrq as _};
+use crate::SpinIrq as _;
 use core::{
     cell::UnsafeCell,
     marker::PhantomPinned,
@@ -8,6 +8,7 @@ use core::{
     sync::atomic::{AtomicBool, AtomicPtr, Ordering},
     task::Waker,
 };
+use eonix_spin::Spin;
 use intrusive_collections::{intrusive_adapter, LinkedListAtomicLink, UnsafeRef};
 
 intrusive_adapter!(

+ 3 - 21
crates/eonix_sync/src/lib.rs

@@ -1,23 +1,5 @@
 #![no_std]
 
-mod guard;
-mod lazy_lock;
-mod locked;
-pub mod marker;
-mod mutex;
-mod rwlock;
-mod spin;
-mod wait_list;
-
-pub use guard::{UnlockableGuard, UnlockedGuard};
-pub use lazy_lock::LazyLock;
-pub use locked::{AsProof, AsProofMut, Locked, Proof, ProofMut};
-pub use mutex::{Mutex, MutexGuard};
-pub use rwlock::{RwLock, RwLockReadGuard, RwLockWriteGuard};
-pub use spin::{
-    ContextUnlock, DisablePreemption, LoopRelax, NoContext, Relax, Spin, SpinContext, SpinGuard,
-    SpinIrq, SpinRelax, UnlockedContext, UnlockedSpinGuard,
-};
-pub use wait_list::WaitList;
-
-extern crate alloc;
+pub use eonix_spin::*;
+pub use eonix_sync_base::*;
+pub use eonix_sync_rt::*;

+ 10 - 12
doc/mem_layout.txt

@@ -1,19 +1,17 @@
 physical memory
 
-0x0000 - 0x1000 : Some early kernel data
-0x1000 - 0x2000 : kernel space PML4
-0x2000 - 0x3000 : kernel PDPT for physical memory mappings
-0x3000 - 0x4000 : kernel PDPT for kernel space
-0x4000 - 0x5000 : kernel PD for kernel image
-0x5000 - 0x6000 : kernel PT for kernel image
-0x6000 - 0x7000 : kernel PD for struct page array#1
-0x7000 - 0x8000 : kernel stage1
+0x0000   - 0x1000   : Some early kernel data
+0x1000   - 0x2000   : kernel space PML4
+0x2000   - 0x3000   : kernel PDPT for physical memory mappings
+0x3000   - 0x4000   : kernel PDPT for kernel space
+0x4000   - 0x5000   : kernel PD for kernel image
+0x5000   - 0x6000   : kernel PT for kernel image
+0x6000   - 0x7000   : kernel stage1
 
-0x100000 - 0x200000 : unused
-0x200000 - 0x400000 : first kernel bss page (2MB)
-0x400000 - ?        : kernel image
-?        - ?        : struct page array
+0x7000   - 0x80000  : unused
 
+0x100000 - 0x200000 : unused
+0x200000 - ?        : kernel image
 
 virtual address space
 

+ 0 - 4
src/kernel.rs

@@ -1,7 +1,6 @@
 pub mod block;
 pub mod console;
 pub mod constants;
-pub mod cpu;
 pub mod interrupt;
 pub mod mem;
 pub mod pcie;
@@ -11,9 +10,6 @@ pub mod timer;
 pub mod user;
 pub mod vfs;
 
-#[cfg(feature = "smp")]
-pub mod smp;
-
 mod chardev;
 mod terminal;
 

+ 0 - 104
src/kernel/async/lock.cc

@@ -1,104 +0,0 @@
-#include <assert.h>
-
-#include <kernel/async/lock.hpp>
-
-namespace kernel::async {
-
-static inline void _raw_spin_lock(spinlock_t* lock_addr) {
-    asm volatile(
-        "%=:\n\t\
-         mov $1, %%eax\n\t\
-         xchg %%eax, (%0)\n\t\
-         cmp $0, %%eax\n\t\
-         jne %=b\n\t\
-        "
-        :
-        : "r"(lock_addr)
-        : "eax", "memory");
-}
-
-static inline void _raw_spin_unlock(spinlock_t* lock_addr) {
-    asm volatile(
-        "mov $0, %%eax\n\
-         xchg %%eax, (%0)"
-        :
-        : "r"(lock_addr)
-        : "eax", "memory");
-}
-
-static inline lock_context_t _save_interrupt_state() {
-    lock_context_t retval;
-    asm volatile(
-        "pushf\n\t"
-        "pop %0\n\t"
-        "cli"
-        : "=g"(retval)
-        :
-        :);
-
-    return retval;
-}
-
-static inline void _restore_interrupt_state(lock_context_t context) {
-    asm volatile(
-        "push %0\n\t"
-        "popf"
-        :
-        : "g"(context)
-        :);
-}
-
-extern "C" void r_preempt_disable();
-extern "C" void r_preempt_enable();
-extern "C" unsigned long r_preempt_count();
-
-void preempt_disable() {
-    r_preempt_disable();
-}
-
-void preempt_enable() {
-    r_preempt_enable();
-}
-
-unsigned long preempt_count() {
-    return r_preempt_count();
-}
-
-void spin_lock(spinlock_t& lock) {
-    preempt_disable();
-    _raw_spin_lock(&lock);
-}
-
-void spin_unlock(spinlock_t& lock) {
-    _raw_spin_unlock(&lock);
-    preempt_enable();
-}
-
-lock_context_t spin_lock_irqsave(spinlock_t& lock) {
-    auto state = _save_interrupt_state();
-    preempt_disable();
-
-    _raw_spin_lock(&lock);
-
-    return state;
-}
-
-void spin_unlock_irqrestore(spinlock_t& lock, lock_context_t state) {
-    _raw_spin_unlock(&lock);
-    preempt_enable();
-    _restore_interrupt_state(state);
-}
-
-mutex::~mutex() {
-    assert(m_lock == 0);
-}
-
-lock_context_t mutex::lock_irq() {
-    return spin_lock_irqsave(m_lock);
-}
-
-void mutex::unlock_irq(lock_context_t state) {
-    spin_unlock_irqrestore(m_lock, state);
-}
-
-} // namespace kernel::async

+ 0 - 46
src/kernel/cpu.rs

@@ -1,46 +0,0 @@
-use super::mem::{AsMemoryBlock, GlobalPageAlloc};
-use arch::{PercpuArea, CPU};
-use core::{alloc::Layout, pin::Pin, ptr::NonNull};
-use eonix_hal::mm::PAGE_SIZE;
-use eonix_mm::paging::Page;
-use eonix_sync::LazyLock;
-
-#[eonix_percpu::define_percpu]
-static CPU: LazyLock<CPU> =
-    LazyLock::new(|| CPU::new(unsafe { eonix_hal::trap::TRAP_STUBS_START }));
-
-/// # Safety
-/// This function is unsafe because it needs preemption to be disabled.
-pub unsafe fn local_cpu() -> Pin<&'static mut CPU> {
-    // SAFETY: `CPU_STATUS` is global static and initialized only once.
-    unsafe { Pin::new_unchecked(CPU.as_mut().get_mut()) }
-}
-
-pub fn percpu_allocate(layout: Layout) -> NonNull<u8> {
-    // TODO: Use page size defined in `arch`.
-    let page_count = layout.size().div_ceil(PAGE_SIZE);
-    let page = Page::alloc_at_least_in(page_count, GlobalPageAlloc::early_alloc());
-    let page_data = page.as_memblk().as_byte_ptr();
-    core::mem::forget(page);
-
-    page_data
-}
-
-pub fn init_localcpu() {
-    let percpu_area = PercpuArea::new(percpu_allocate);
-
-    // Preemption count is percpu. So we need to initialize percpu area first.
-    percpu_area.setup();
-
-    eonix_preempt::disable();
-
-    // SAFETY: Preemption is disabled.
-    let mut cpu = unsafe { local_cpu() };
-
-    unsafe {
-        cpu.as_mut().init();
-    }
-    percpu_area.register(cpu.cpuid());
-
-    eonix_preempt::enable();
-}

+ 5 - 27
src/kernel/interrupt.rs

@@ -1,9 +1,9 @@
-use super::cpu::local_cpu;
 use super::mem::handle_kernel_page_fault;
 use super::timer::timer_interrupt;
 use crate::kernel::constants::EINVAL;
 use crate::{driver::Port8, prelude::*};
 use alloc::sync::Arc;
+use eonix_hal::processor::CPU;
 use eonix_hal::traits::fault::Fault;
 use eonix_hal::traits::trap::{RawTrapContext, TrapType};
 use eonix_hal::trap::TrapContext;
@@ -11,11 +11,6 @@ use eonix_mm::address::{Addr as _, VAddr};
 use eonix_runtime::scheduler::Scheduler;
 use eonix_sync::SpinIrq as _;
 
-const PIC1_COMMAND: Port8 = Port8::new(0x20);
-const PIC1_DATA: Port8 = Port8::new(0x21);
-const PIC2_COMMAND: Port8 = Port8::new(0xA0);
-const PIC2_DATA: Port8 = Port8::new(0xA1);
-
 static IRQ_HANDLERS: Spin<[Option<Arc<dyn Fn() + Send + Sync>>; 16]> =
     Spin::new([const { None }; 16]);
 
@@ -27,6 +22,9 @@ pub fn default_irq_handler(irqno: usize) {
         handler();
     }
 
+    const PIC1_COMMAND: Port8 = Port8::new(0x20);
+    const PIC2_COMMAND: Port8 = Port8::new(0xA0);
+
     PIC1_COMMAND.write(0x20); // EOI
     if irqno >= 8 {
         PIC2_COMMAND.write(0x20); // EOI
@@ -82,26 +80,6 @@ where
     Ok(())
 }
 
-pub fn init() -> KResult<()> {
-    // Initialize PIC
-    PIC1_COMMAND.write(0x11); // edge trigger mode
-    PIC1_DATA.write(0x20); // IRQ 0-7 offset
-    PIC1_DATA.write(0x04); // cascade with slave PIC
-    PIC1_DATA.write(0x01); // no buffer mode
-
-    PIC2_COMMAND.write(0x11); // edge trigger mode
-    PIC2_DATA.write(0x28); // IRQ 8-15 offset
-    PIC2_DATA.write(0x02); // cascade with master PIC
-    PIC2_DATA.write(0x01); // no buffer mode
-
-    // Allow all IRQs
-    PIC1_DATA.write(0x0);
-    PIC2_DATA.write(0x0);
-
-    Ok(())
-}
-
 pub fn end_of_interrupt() {
-    // SAFETY: We only use this function in irq context, where preemption is disabled.
-    unsafe { local_cpu() }.interrupt.end_of_interrupt();
+    CPU::local().as_mut().end_of_interrupt();
 }

+ 1 - 1
src/kernel/mem.rs

@@ -7,7 +7,7 @@ mod mm_area;
 mod mm_list;
 mod page_alloc;
 
-pub use access::{AsMemoryBlock, KernelPageAccess, MemoryBlock, PhysAccess};
+pub use access::{AsMemoryBlock, MemoryBlock, PhysAccess};
 pub(self) use mm_area::MMArea;
 pub use mm_list::{handle_kernel_page_fault, FileMapping, MMList, Mapping, Permission};
 pub use page_alloc::{GlobalPageAlloc, RawPage};

+ 4 - 21
src/kernel/mem/access.rs

@@ -1,6 +1,7 @@
 use core::{num::NonZero, ptr::NonNull};
-use eonix_mm::address::{Addr as _, PAddr, VAddr};
-use eonix_mm::paging::{PageAccess, PageBlock, PFN};
+use eonix_hal::mm::ArchPhysAccess;
+use eonix_mm::address::{Addr as _, PAddr, PhysAccess as _PhysAccess, VAddr};
+use eonix_mm::paging::PFN;
 
 use super::page_alloc::RawPagePtr;
 
@@ -15,8 +16,6 @@ pub struct MemoryBlock {
     len: usize,
 }
 
-pub struct KernelPageAccess;
-
 pub trait AsMemoryBlock {
     /// Translate the physical page the page object pointing to into kernel
     /// accessible pointer. Use it with care.
@@ -139,23 +138,7 @@ impl MemoryBlock {
 
 impl PhysAccess for PAddr {
     unsafe fn as_ptr<T>(&self) -> NonNull<T> {
-        let alignment: usize = align_of::<T>();
-        assert!(self.addr() % alignment == 0, "Alignment error");
-
-        unsafe {
-            // SAFETY: We can assume that we'll never have `self.addr()` equals
-            //         to `-PHYS_OFFSET`. Otherwise, the kernel might be broken.
-            NonNull::new_unchecked((PHYS_OFFSET + self.addr()) as *mut T)
-        }
-    }
-}
-
-impl PageAccess for KernelPageAccess {
-    unsafe fn get_ptr_for_pfn(pfn: PFN) -> NonNull<PageBlock> {
-        unsafe {
-            // SAFETY: The physical address of a page must be aligned to the page size.
-            PAddr::from(pfn).as_ptr()
-        }
+        ArchPhysAccess::as_ptr(*self)
     }
 }
 

+ 13 - 16
src/kernel/mem/mm_list.rs

@@ -1,19 +1,18 @@
 mod mapping;
 mod page_fault;
 
-use super::access::KernelPageAccess;
 use super::address::{VAddrExt as _, VRangeExt as _};
 use super::page_alloc::GlobalPageAlloc;
-use super::paging::{AllocZeroed as _, PageUnmanaged};
+use super::paging::AllocZeroed as _;
 use super::{AsMemoryBlock, MMArea, Page};
 use crate::kernel::constants::{EEXIST, EFAULT, EINVAL, ENOMEM};
 use crate::{prelude::*, sync::ArcSwap};
 use alloc::collections::btree_set::BTreeSet;
 use core::fmt;
 use core::sync::atomic::{AtomicUsize, Ordering};
-use eonix_hal::mm::DefaultPagingMode;
+use eonix_hal::mm::{ArchPagingMode, ArchPhysAccess, GLOBAL_PAGE_TABLE};
 use eonix_mm::address::{Addr as _, PAddr};
-use eonix_mm::page_table::{PageAttribute, PagingMode};
+use eonix_mm::page_table::PageAttribute;
 use eonix_mm::paging::PFN;
 use eonix_mm::{
     address::{AddrOps as _, VAddr, VRange},
@@ -27,10 +26,6 @@ pub use mapping::{FileMapping, Mapping};
 pub use page_fault::handle_kernel_page_fault;
 
 pub static EMPTY_PAGE: LazyLock<Page> = LazyLock::new(|| Page::zeroed());
-static KERNEL_ROOT_TABLE_PAGE: LazyLock<PageUnmanaged> = LazyLock::new(|| unsafe {
-    // SAFETY: The kernel page table is always valid.
-    PageUnmanaged::from_raw_unchecked(DefaultPagingMode::KERNEL_ROOT_TABLE_PFN)
-});
 
 #[derive(Debug, Clone, Copy)]
 pub struct Permission {
@@ -38,9 +33,11 @@ pub struct Permission {
     pub execute: bool,
 }
 
+pub type KernelPageTable<'a> = PageTable<'a, ArchPagingMode, GlobalPageAlloc, ArchPhysAccess>;
+
 struct MMListInner<'a> {
     areas: BTreeSet<MMArea>,
-    page_table: PageTable<'a, DefaultPagingMode, GlobalPageAlloc, KernelPageAccess>,
+    page_table: KernelPageTable<'a>,
     break_start: Option<VRange>,
     break_pos: Option<VAddr>,
 }
@@ -226,7 +223,7 @@ impl MMList {
     }
 
     pub fn new() -> Self {
-        let page_table = PageTable::new(&KERNEL_ROOT_TABLE_PAGE);
+        let page_table = GLOBAL_PAGE_TABLE.clone_global();
         Self {
             root_page_table: AtomicUsize::from(page_table.addr().addr()),
             user_count: AtomicUsize::new(0),
@@ -241,9 +238,9 @@ impl MMList {
 
     pub async fn new_cloned(&self) -> Self {
         let inner = self.inner.borrow();
-        let inner = inner.lock().await;
+        let mut inner = inner.lock().await;
 
-        let page_table = PageTable::new(&KERNEL_ROOT_TABLE_PAGE);
+        let page_table = GLOBAL_PAGE_TABLE.clone_global();
         let list = Self {
             root_page_table: AtomicUsize::from(page_table.addr().addr()),
             user_count: AtomicUsize::new(0),
@@ -262,7 +259,7 @@ impl MMList {
             for area in list_inner.areas.iter() {
                 list_inner
                     .page_table
-                    .set_copy_on_write(&inner.page_table, area.range());
+                    .set_copy_on_write(&mut inner.page_table, area.range());
             }
         }
 
@@ -281,7 +278,7 @@ impl MMList {
     }
 
     pub fn deactivate(&self) {
-        arch::set_root_page_table_pfn(DefaultPagingMode::KERNEL_ROOT_TABLE_PFN);
+        arch::set_root_page_table_pfn(PFN::from(GLOBAL_PAGE_TABLE.addr()));
 
         let old_user_count = self.user_count.fetch_sub(1, Ordering::Release);
         assert_ne!(old_user_count, 0);
@@ -333,7 +330,7 @@ impl MMList {
 
         let new_root_page_table = match &new {
             Some(new_mm) => new_mm.root_page_table.load(Ordering::Relaxed),
-            None => PAddr::from(DefaultPagingMode::KERNEL_ROOT_TABLE_PFN).addr(),
+            None => GLOBAL_PAGE_TABLE.addr().addr(),
         };
 
         arch::set_root_page_table_pfn(PFN::from(PAddr::from(new_root_page_table)));
@@ -551,7 +548,7 @@ trait PageTableExt {
     fn set_copy_on_write(&self, from: &Self, range: VRange);
 }
 
-impl PageTableExt for PageTable<'_, DefaultPagingMode, GlobalPageAlloc, KernelPageAccess> {
+impl PageTableExt for KernelPageTable<'_> {
     fn set_anonymous(&self, range: VRange, permission: Permission) {
         for pte in self.iter_user(range) {
             pte.set_anonymous(permission.execute);

+ 0 - 25
src/kernel/mem/page_alloc.rs

@@ -26,11 +26,6 @@ pub struct GlobalPageAlloc;
 #[derive(Clone)]
 pub struct BuddyPageAlloc();
 
-/// Allocator that allocates pages from the buddy allocator while we are still in
-/// the early stage of the kernel when the preemption is both disabled and not functioning.
-#[derive(Clone)]
-pub struct EarlyPageAlloc();
-
 struct PerCpuPageAlloc {
     batch: u32,
     // TODO: might be used in the future.
@@ -94,10 +89,6 @@ impl GlobalPageAlloc {
         BuddyPageAlloc()
     }
 
-    pub const fn early_alloc() -> EarlyPageAlloc {
-        EarlyPageAlloc()
-    }
-
     pub fn mark_present(range: PRange) {
         let mut pfn = PFN::from(range.start().ceil());
         let end_pfn = PFN::from(range.end().floor());
@@ -179,19 +170,3 @@ impl PageAlloc for BuddyPageAlloc {
         BuddyAllocator::has_management_over(page_ptr)
     }
 }
-
-impl PageAlloc for EarlyPageAlloc {
-    type RawPage = RawPagePtr;
-
-    fn alloc_order(&self, order: u32) -> Option<Self::RawPage> {
-        BUDDY_ALLOC.lock_with_context(NoContext).alloc_order(order)
-    }
-
-    unsafe fn dealloc(&self, raw_page: Self::RawPage) {
-        BUDDY_ALLOC.lock_with_context(NoContext).dealloc(raw_page);
-    }
-
-    fn has_management_over(&self, page_ptr: Self::RawPage) -> bool {
-        BuddyAllocator::has_management_over(page_ptr)
-    }
-}

+ 1 - 2
src/kernel/mem/paging.rs

@@ -1,8 +1,7 @@
 use super::{access::AsMemoryBlock, page_alloc::GlobalPageAlloc, MemoryBlock, PhysAccess};
 use crate::io::{Buffer, FillResult};
-use eonix_mm::paging::{NoAlloc, Page as GenericPage, PageAlloc};
+use eonix_mm::paging::{Page as GenericPage, PageAlloc};
 
-pub type PageUnmanaged = GenericPage<NoAlloc>;
 pub type Page = GenericPage<GlobalPageAlloc>;
 
 /// A buffer that wraps a page and provides a `Buffer` interface.

+ 0 - 43
src/kernel/smp.rs

@@ -1,43 +0,0 @@
-use super::cpu::init_localcpu;
-use crate::{
-    kernel::{cpu::local_cpu, mem::paging::Page, task::KernelStack},
-    println_debug,
-};
-use arch::define_smp_bootstrap;
-use eonix_mm::address::Addr as _;
-use eonix_runtime::scheduler::Scheduler;
-
-define_smp_bootstrap!(4, ap_entry, {
-    let page = Page::alloc_order(9);
-    let stack_bottom = page.range().end();
-    core::mem::forget(page);
-
-    // Physical address is used for init state APs.
-    stack_bottom.addr() as u64
-});
-
-unsafe extern "C" fn ap_entry() -> ! {
-    init_localcpu();
-
-    Scheduler::init_local_scheduler::<KernelStack>();
-    println_debug!("AP{} started", local_cpu().cpuid());
-
-    eonix_preempt::disable();
-    arch::enable_irqs();
-
-    // TODO!!!!!: Free the stack after having switched to idle task.
-    unsafe {
-        // SAFETY: `preempt::count()` == 1.
-        Scheduler::goto_scheduler_noreturn()
-    }
-}
-
-pub fn bootstrap_smp() {
-    eonix_preempt::disable();
-    unsafe {
-        // SAFETY: Preemption is disabled.
-        local_cpu().bootstrap_cpus();
-        wait_cpus_online();
-    }
-    eonix_preempt::enable();
-}

+ 9 - 7
src/kernel/task/thread.rs

@@ -4,7 +4,6 @@ use super::{
 };
 use crate::{
     kernel::{
-        cpu::local_cpu,
         interrupt::default_irq_handler,
         syscall::{syscall_handlers, SyscallHandler},
         timer::timer_interrupt,
@@ -14,7 +13,7 @@ use crate::{
     prelude::*,
 };
 use alloc::sync::Arc;
-use arch::{FpuState, UserTLS};
+use arch::FpuState;
 use atomic_unique_refcell::AtomicUniqueRefCell;
 use core::{
     future::Future,
@@ -23,13 +22,15 @@ use core::{
     sync::atomic::{AtomicBool, Ordering},
     task::{Context, Poll, Waker},
 };
+use eonix_hal::traits::trap::IrqState;
 use eonix_hal::{
+    processor::{UserTLS, CPU},
     traits::{
         fault::Fault,
         fpu::RawFpuState as _,
         trap::{RawTrapContext, TrapReturn, TrapType},
     },
-    trap::TrapContext,
+    trap::{disable_irqs_save, TrapContext},
 };
 use eonix_mm::address::{Addr as _, VAddr};
 use eonix_runtime::run::{Contexted, Run, RunState};
@@ -280,8 +281,7 @@ impl Thread {
     /// to be called in a preemption disabled context.
     pub unsafe fn load_thread_area32(&self) {
         if let Some(tls) = self.inner.lock().tls.as_ref() {
-            // SAFETY: Preemption is disabled.
-            tls.load(local_cpu());
+            CPU::local().as_mut().set_tls32(tls);
         }
     }
 
@@ -402,7 +402,7 @@ impl Thread {
             type Output = F::Output;
 
             fn poll(mut self: Pin<&mut Self>, ctx: &mut Context<'_>) -> Poll<Self::Output> {
-                let irq_state = arch::disable_irqs_save();
+                let irq_state = disable_irqs_save();
                 let (future, _) = unsafe {
                     // SAFETY: We construct a pinned future and `&Thread` is `Unpin`.
                     let me = self.as_mut().get_unchecked_mut();
@@ -466,7 +466,9 @@ impl<F: Future> Contexted for ThreadRunnable<F> {
         unsafe {
             let trap_ctx_ptr: *const TrapContext = &raw const *self.thread.trap_ctx.borrow();
             // SAFETY:
-            arch::load_interrupt_stack(local_cpu(), trap_ctx_ptr.add(1).addr() as u64);
+            CPU::local()
+                .as_mut()
+                .load_interrupt_stack(trap_ctx_ptr.add(1).addr() as u64);
         }
     }
 

+ 29 - 185
src/kernel_init.rs

@@ -1,139 +1,34 @@
-use crate::{
-    kernel::{
-        self,
-        cpu::init_localcpu,
-        mem::{AsMemoryBlock, GlobalPageAlloc, KernelPageAccess, RawPage},
-    },
-    kernel_init,
+use crate::kernel::mem::{GlobalPageAlloc, RawPage};
+use eonix_hal::{
+    bootstrap::BootStrapData,
+    mm::{ArchMemory, ArchPagingMode, GLOBAL_PAGE_TABLE},
+    traits::mm::Memory,
 };
-use eonix_hal::mm::DefaultPagingMode;
 use eonix_mm::{
-    address::{Addr as _, AddrOps as _, PAddr, PRange, VAddr, VRange},
+    address::{Addr as _, AddrOps as _, VAddr, VRange},
     page_table::{PageAttribute, PagingMode as _, PTE},
-    paging::{NoAlloc, Page as GenericPage, PAGE_SIZE, PFN},
+    paging::{Page as GenericPage, PAGE_SIZE, PFN},
 };
-use eonix_runtime::context::ExecutionContext;
-use eonix_sync::LazyLock;
 
-static GLOBAL_PAGE_TABLE: LazyLock<
-    eonix_mm::page_table::PageTable<DefaultPagingMode, NoAlloc, KernelPageAccess>,
-> = LazyLock::new(|| unsafe {
-    GenericPage::with_raw(
-        DefaultPagingMode::KERNEL_ROOT_TABLE_PFN,
-        |root_table_page| eonix_mm::page_table::PageTable::with_root_table(root_table_page.clone()),
-    )
-});
-
-const HUGE_PAGE_LEN: usize = 1 << 21;
-
-const P_KERNEL_BSS_START: PAddr = PAddr::from_val(0x200000);
-const P_KIMAGE_START: PAddr = PAddr::from_val(0x400000);
-
-const V_KERNEL_PAGE_ARRAY_START: VAddr = VAddr::from(0xffffff8040000000);
-const V_KERNEL_BSS_START: VAddr = VAddr::from(0xffffffffc0200000);
-const KERNEL_BSS_LEN: usize = HUGE_PAGE_LEN;
-
-#[repr(C)]
-#[derive(Copy, Clone)]
-struct E820MemMapEntry {
-    base: u64,
-    len: u64,
-    entry_type: u32,
-    acpi_attrs: u32,
-}
-
-#[repr(C)]
-#[derive(Copy, Clone)]
-struct BootLoaderData {
-    entry_count: u32,
-    entry_length: u32,
-
-    block_count_1k: u32,
-    block_count_64k: u32,
-
-    all_entries: [E820MemMapEntry; 42],
-}
-
-impl E820MemMapEntry {
-    const ENTRY_FREE: u32 = 1;
-    // const ENTRY_USED: u32 = 2;
-
-    fn is_free(&self) -> bool {
-        self.entry_type == Self::ENTRY_FREE
-    }
-
-    // fn is_used(&self) -> bool {
-    //     self.entry_type == Self::ENTRY_USED
-    // }
-
-    fn range(&self) -> PRange {
-        PRange::from(PAddr::from(self.base as usize)).grow(self.len as usize)
-    }
-}
-
-impl BootLoaderData {
-    // fn memory_size(&self) -> usize {
-    //     // The initial 1M is not counted in the E820 map. We add them to the total as well.
-    //     ((self.block_count_1k + 64 * self.block_count_64k) * 1024 + 1 * 1024 * 1024) as usize
-    // }
-
-    fn entries(&self) -> &[E820MemMapEntry] {
-        &self.all_entries[..self.entry_count as usize]
-    }
-
-    fn free_entries(&self) -> impl Iterator<Item = &E820MemMapEntry> {
-        self.entries().iter().filter(|entry| entry.is_free())
-    }
-}
-
-#[no_mangle]
-pub(self) extern "C" fn _kernel_init(bootloader_data: &mut BootLoaderData) -> ! {
-    // Map kernel BSS
-    for pte in GLOBAL_PAGE_TABLE.iter_kernel_levels(
-        VRange::from(V_KERNEL_BSS_START).grow(KERNEL_BSS_LEN),
-        &DefaultPagingMode::LEVELS[..3],
-    ) {
-        let attr = PageAttribute::PRESENT
-            | PageAttribute::WRITE
-            | PageAttribute::READ
-            | PageAttribute::HUGE
-            | PageAttribute::GLOBAL;
-
-        pte.set(PFN::from(P_KERNEL_BSS_START), attr.into());
-    }
-
-    unsafe {
-        // SAFETY: We've just mapped the area with sufficient length.
-        core::ptr::write_bytes(V_KERNEL_BSS_START.addr() as *mut (), 0, KERNEL_BSS_LEN);
-    }
-
-    let addr_max = bootloader_data
-        .free_entries()
-        .map(|entry| entry.range().end())
+pub fn setup_memory(data: &mut BootStrapData) {
+    let addr_max = ArchMemory::present_ram()
+        .map(|range| range.end())
         .max()
         .expect("No free memory");
 
     let pfn_max = PFN::from(addr_max.ceil());
     let len_bytes_page_array = usize::from(pfn_max) * size_of::<RawPage>();
+    let count_pages = len_bytes_page_array.div_ceil(PAGE_SIZE);
 
-    let count_huge_pages = len_bytes_page_array.div_ceil(HUGE_PAGE_LEN);
-
-    extern "C" {
-        // Definition inside linker script.
-        fn KIMAGE_PAGES();
-    }
-
-    let kimage_pages = unsafe { core::mem::transmute::<_, usize>(KIMAGE_PAGES as *const ()) };
-
-    let paddr_after_kimage = P_KIMAGE_START + kimage_pages * PAGE_SIZE;
-    let paddr_after_kimage_aligned = paddr_after_kimage.ceil_to(HUGE_PAGE_LEN);
-
-    let mut paddr_free = paddr_after_kimage_aligned;
+    let alloc = data.get_alloc().unwrap();
 
     // Map kernel page array.
-    for pte in GLOBAL_PAGE_TABLE.iter_kernel_levels(
-        VRange::from(V_KERNEL_PAGE_ARRAY_START).grow(HUGE_PAGE_LEN * count_huge_pages),
-        &DefaultPagingMode::LEVELS[..3],
+    const V_KERNEL_PAGE_ARRAY_START: VAddr = VAddr::from(0xffffff8040000000);
+
+    for pte in GLOBAL_PAGE_TABLE.iter_kernel_in(
+        VRange::from(V_KERNEL_PAGE_ARRAY_START).grow(PAGE_SIZE * count_pages),
+        ArchPagingMode::LEVELS,
+        &alloc,
     ) {
         let attr = PageAttribute::PRESENT
             | PageAttribute::WRITE
@@ -141,9 +36,8 @@ pub(self) extern "C" fn _kernel_init(bootloader_data: &mut BootLoaderData) -> !
             | PageAttribute::HUGE
             | PageAttribute::GLOBAL;
 
-        pte.set(PFN::from(paddr_free), attr.into());
-
-        paddr_free = paddr_free + HUGE_PAGE_LEN;
+        let page = GenericPage::alloc_in(&alloc);
+        pte.set(page.into_raw(), attr.into());
     }
 
     unsafe {
@@ -151,70 +45,20 @@ pub(self) extern "C" fn _kernel_init(bootloader_data: &mut BootLoaderData) -> !
         core::ptr::write_bytes(
             V_KERNEL_PAGE_ARRAY_START.addr() as *mut (),
             0,
-            count_huge_pages * HUGE_PAGE_LEN,
+            count_pages * PAGE_SIZE,
         );
     }
 
-    let paddr_unused_start = paddr_free;
-
-    for entry in bootloader_data.free_entries() {
-        let mut range = entry.range();
-
+    for range in ArchMemory::present_ram() {
         GlobalPageAlloc::mark_present(range);
-
-        if range.end() <= paddr_unused_start {
-            continue;
-        }
-
-        if range.start() < paddr_unused_start {
-            let (_, right) = range.split_at(paddr_unused_start);
-            range = right;
-        }
-
-        unsafe {
-            // SAFETY: We are in system initialization procedure where preemption is disabled.
-            GlobalPageAlloc::add_pages(range);
-        }
-    }
-
-    unsafe {
-        // SAFETY: We are in system initialization procedure where preemption is disabled.
-        GlobalPageAlloc::add_pages(PRange::new(PAddr::from(0x100000), PAddr::from(0x200000)));
-        GlobalPageAlloc::add_pages(PRange::new(paddr_after_kimage, paddr_after_kimage_aligned));
     }
 
-    let (stack_bottom_addr, stack_pfn) = {
-        let kernel_stack_page = GenericPage::alloc_order_in(9, GlobalPageAlloc::early_alloc());
-        let stack_area = kernel_stack_page.as_memblk();
-
-        let stack_bottom_addr = stack_area
-            .addr()
-            .checked_add(stack_area.len())
-            .expect("The stack bottom should not be null");
-
-        let stack_pfn = kernel_stack_page.into_raw();
-
-        (stack_bottom_addr, stack_pfn)
-    };
-
-    let mut to_ctx = ExecutionContext::new();
-    to_ctx.set_interrupt(false);
-    to_ctx.set_sp(stack_bottom_addr.get());
-    to_ctx.call1(_init_on_new_stack, usize::from(stack_pfn));
-
-    to_ctx.switch_noreturn();
-}
-
-extern "C" fn _init_on_new_stack(early_kernel_stack_pfn: PFN) -> ! {
-    // Add the pages previously used by `_kernel_init` as a stack.
-    unsafe {
-        // SAFETY: We are in system initialization procedure where preemption is disabled.
-        GlobalPageAlloc::add_pages(PRange::new(PAddr::from(0x8000), PAddr::from(0x80000)));
+    if let Some(early_alloc) = data.take_alloc() {
+        for range in early_alloc.into_iter() {
+            unsafe {
+                // SAFETY: We are in system initialization procedure where preemption is disabled.
+                GlobalPageAlloc::add_pages(range);
+            }
+        }
     }
-
-    init_localcpu();
-
-    kernel::interrupt::init().unwrap();
-
-    kernel_init(early_kernel_stack_pfn)
 }

+ 47 - 10
src/lib.rs

@@ -22,11 +22,16 @@ mod rcu;
 mod sync;
 
 use alloc::{ffi::CString, sync::Arc};
+use core::{
+    hint::spin_loop,
+    sync::atomic::{AtomicBool, Ordering},
+};
 use elf::ParsedElf32;
-use eonix_mm::paging::PFN;
+use eonix_hal::{processor::CPU, traits::trap::IrqState, trap::disable_irqs_save};
+use eonix_mm::address::PRange;
 use eonix_runtime::{run::FutureRun, scheduler::Scheduler, task::Task};
 use kernel::{
-    mem::Page,
+    mem::GlobalPageAlloc,
     pcie::init_pcie,
     task::{new_thread_runnable, KernelStack, ProcessBuilder, ProcessList, ThreadBuilder},
     vfs::{
@@ -36,6 +41,7 @@ use kernel::{
     },
     CharDevice,
 };
+use kernel_init::setup_memory;
 use path::Path;
 use prelude::*;
 
@@ -54,11 +60,17 @@ fn panic(info: &core::panic::PanicInfo) -> ! {
     println_fatal!();
     println_fatal!("{}", info.message());
 
-    arch::freeze()
+    loop {}
 }
 
-#[no_mangle]
-pub extern "C" fn kernel_init(early_kstack_pfn: PFN) -> ! {
+static BSP_OK: AtomicBool = AtomicBool::new(false);
+
+#[eonix_hal::main]
+fn kernel_init(mut data: eonix_hal::bootstrap::BootStrapData) -> ! {
+    setup_memory(&mut data);
+
+    BSP_OK.store(true, Ordering::Release);
+
     init_pcie().expect("Unable to initialize PCIe bus");
 
     // To satisfy the `Scheduler` "preempt count == 0" assertion.
@@ -68,16 +80,43 @@ pub extern "C" fn kernel_init(early_kstack_pfn: PFN) -> ! {
     // So call `init_vfs` first, then `init_multitasking`.
     Scheduler::init_local_scheduler::<KernelStack>();
 
-    Scheduler::get().spawn::<KernelStack, _>(FutureRun::new(init_process(early_kstack_pfn)));
+    Scheduler::get().spawn::<KernelStack, _>(FutureRun::new(init_process(data.get_early_stack())));
 
+    drop(data);
     unsafe {
         // SAFETY: `preempt::count()` == 1.
         Scheduler::goto_scheduler_noreturn()
     }
 }
 
-async fn init_process(early_kstack_pfn: PFN) {
-    unsafe { Page::from_raw(early_kstack_pfn) };
+#[eonix_hal::ap_main]
+fn kernel_ap_main(_stack_range: PRange) -> ! {
+    while BSP_OK.load(Ordering::Acquire) == false {
+        // Wait for BSP to finish initializing.
+        spin_loop();
+    }
+
+    Scheduler::init_local_scheduler::<KernelStack>();
+    println_debug!("AP{} started", CPU::local().cpuid());
+
+    eonix_preempt::disable();
+
+    // TODO!!!!!: Free the stack after having switched to idle task.
+    unsafe {
+        // SAFETY: `preempt::count()` == 1.
+        Scheduler::goto_scheduler_noreturn()
+    }
+}
+
+async fn init_process(early_kstack: PRange) {
+    unsafe {
+        let irq_ctx = disable_irqs_save();
+
+        // SAFETY: IRQ is disabled.
+        GlobalPageAlloc::add_pages(early_kstack);
+
+        irq_ctx.restore();
+    }
 
     CharDevice::init().unwrap();
 
@@ -91,8 +130,6 @@ async fn init_process(early_kstack_pfn: PFN) {
     fs::procfs::init();
     fs::fat32::init();
 
-    kernel::smp::bootstrap_smp();
-
     let (ip, sp, mm_list) = {
         // mount fat32 /mnt directory
         let fs_context = FsContext::global();

+ 0 - 21
src/types/libstdcpp.cpp

@@ -1,21 +0,0 @@
-#include <assert.h>
-
-#include <types/types.h>
-
-extern "C" void NORETURN __stack_chk_fail(void) {
-    for (;;)
-        ;
-}
-
-extern "C" void NORETURN __cxa_pure_virtual(void) {
-    for (;;)
-        ;
-}
-
-void NORETURN __assert_fail(const char* statement, const char* file, int line, const char* func) {
-    (void)statement, (void)file, (void)line, (void)func;
-    for (;;)
-        asm volatile(
-            "cli\n\t"
-            "hlt\n\t");
-}