Przeglądaj źródła

Merge branch 'mm-refactor'

greatbridf 8 miesięcy temu
rodzic
commit
c2f64394c6
100 zmienionych plików z 5557 dodań i 1689 usunięć
  1. 30 38
      Cargo.lock
  2. 11 15
      Cargo.toml
  3. 2 1
      arch/Cargo.toml
  4. 13 10
      arch/percpu-macros/src/arch.rs
  5. 64 0
      arch/percpu-macros/src/lib.rs
  6. 34 0
      arch/src/x86_64/fence.rs
  7. 10 39
      arch/src/x86_64/init.rs
  8. 36 3
      arch/src/x86_64/interrupt.rs
  9. 219 0
      arch/src/x86_64/mm.rs
  10. 16 10
      arch/src/x86_64/mod.rs
  11. 80 10
      arch/src/x86_64/percpu.rs
  12. 3 3
      arch/src/x86_64/user.rs
  13. 9 0
      crates/buddy_allocator/Cargo.toml
  14. 47 0
      crates/buddy_allocator/src/free_area.rs
  15. 73 0
      crates/buddy_allocator/src/lib.rs
  16. 122 0
      crates/buddy_allocator/src/zone.rs
  17. 0 1
      crates/eonix_log/Cargo.toml
  18. 1 4
      crates/eonix_log/src/lib.rs
  19. 7 0
      crates/eonix_mm/Cargo.toml
  20. 14 0
      crates/eonix_mm/src/address.rs
  21. 64 0
      crates/eonix_mm/src/address/addr.rs
  22. 190 0
      crates/eonix_mm/src/address/addr_range.rs
  23. 4 0
      crates/eonix_mm/src/address/error.rs
  24. 65 0
      crates/eonix_mm/src/address/paddr.rs
  25. 60 0
      crates/eonix_mm/src/address/vaddr.rs
  26. 5 0
      crates/eonix_mm/src/lib.rs
  27. 9 0
      crates/eonix_mm/src/page_table.rs
  28. 132 0
      crates/eonix_mm/src/page_table/page_table.rs
  29. 38 0
      crates/eonix_mm/src/page_table/paging_mode.rs
  30. 52 0
      crates/eonix_mm/src/page_table/pte.rs
  31. 177 0
      crates/eonix_mm/src/page_table/pte_iterator.rs
  32. 9 0
      crates/eonix_mm/src/paging.rs
  33. 219 0
      crates/eonix_mm/src/paging/page.rs
  34. 31 0
      crates/eonix_mm/src/paging/page_alloc.rs
  35. 65 0
      crates/eonix_mm/src/paging/pfn.rs
  36. 97 0
      crates/eonix_mm/src/paging/raw_page.rs
  37. 12 0
      crates/eonix_percpu/Cargo.toml
  38. 24 0
      crates/eonix_percpu/src/arch.rs
  39. 181 0
      crates/eonix_percpu/src/lib.rs
  40. 15 0
      crates/eonix_preempt/src/lib.rs
  41. 0 1
      crates/eonix_runtime/Cargo.toml
  42. 7 19
      crates/eonix_runtime/src/executor.rs
  43. 3 3
      crates/eonix_runtime/src/executor/builder.rs
  44. 3 1
      crates/eonix_runtime/src/executor/stack.rs
  45. 5 10
      crates/eonix_runtime/src/ready_queue.rs
  46. 3 29
      crates/eonix_runtime/src/run.rs
  47. 3 3
      crates/eonix_runtime/src/run/future_run.rs
  48. 57 82
      crates/eonix_runtime/src/scheduler.rs
  49. 100 71
      crates/eonix_runtime/src/task.rs
  50. 1 2
      crates/eonix_runtime/src/task/adapter.rs
  51. 16 9
      crates/eonix_runtime/src/task/task_state.rs
  52. 6 0
      crates/eonix_sync/Cargo.toml
  53. 10 153
      crates/eonix_sync/src/guard.rs
  54. 172 0
      crates/eonix_sync/src/lazy_lock.rs
  55. 17 7
      crates/eonix_sync/src/lib.rs
  56. 0 139
      crates/eonix_sync/src/lock.rs
  57. 95 0
      crates/eonix_sync/src/locked.rs
  58. 182 0
      crates/eonix_sync/src/locked/proof.rs
  59. 12 0
      crates/eonix_sync/src/marker.rs
  60. 96 0
      crates/eonix_sync/src/mutex.rs
  61. 98 0
      crates/eonix_sync/src/mutex/guard.rs
  62. 194 0
      crates/eonix_sync/src/rwlock.rs
  63. 192 0
      crates/eonix_sync/src/rwlock/guard.rs
  64. 89 95
      crates/eonix_sync/src/spin.rs
  65. 114 0
      crates/eonix_sync/src/spin/guard.rs
  66. 17 0
      crates/eonix_sync/src/spin/relax.rs
  67. 124 0
      crates/eonix_sync/src/spin/spin_irq.rs
  68. 0 45
      crates/eonix_sync/src/strategy.rs
  69. 122 0
      crates/eonix_sync/src/wait_list.rs
  70. 221 0
      crates/eonix_sync/src/wait_list/wait_handle.rs
  71. 107 0
      crates/eonix_sync/src/wait_list/wait_object.rs
  72. 6 0
      crates/intrusive_list/Cargo.toml
  73. 59 0
      crates/intrusive_list/src/lib.rs
  74. 6 0
      crates/posix_types/Cargo.toml
  75. 4 0
      crates/posix_types/src/lib.rs
  76. 1 0
      crates/posix_types/src/result.rs
  77. 3 0
      crates/posix_types/src/signal.rs
  78. 106 0
      crates/posix_types/src/signal/sig_action.rs
  79. 1 1
      rust-toolchain
  80. 3 5
      src/driver/ahci/command.rs
  81. 48 0
      src/driver/ahci/command_table.rs
  82. 11 8
      src/driver/ahci/control.rs
  83. 4 1
      src/driver/ahci/defs.rs
  84. 63 65
      src/driver/ahci/mod.rs
  85. 115 225
      src/driver/ahci/port.rs
  86. 58 0
      src/driver/ahci/register.rs
  87. 94 0
      src/driver/ahci/slot.rs
  88. 46 0
      src/driver/ahci/stats.rs
  89. 434 432
      src/driver/e1000e.rs
  90. 25 15
      src/driver/serial.rs
  91. 11 11
      src/elf.rs
  92. 18 19
      src/fs/fat32.rs
  93. 10 4
      src/fs/fat32/file.rs
  94. 28 34
      src/fs/procfs.rs
  95. 28 28
      src/fs/tmpfs.rs
  96. 40 4
      src/io.rs
  97. 13 16
      src/kernel/block.rs
  98. 13 11
      src/kernel/chardev.rs
  99. 1 4
      src/kernel/console.rs
  100. 2 3
      src/kernel/constants.rs

+ 30 - 38
Cargo.lock

@@ -16,6 +16,7 @@ name = "arch"
 version = "0.1.0"
 dependencies = [
  "cfg-if",
+ "eonix_mm",
  "percpu-macros",
 ]
 
@@ -55,6 +56,15 @@ version = "2.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
 
+[[package]]
+name = "buddy_allocator"
+version = "0.1.0"
+dependencies = [
+ "eonix_mm",
+ "eonix_sync",
+ "intrusive_list",
+]
+
 [[package]]
 name = "cexpr"
 version = "0.6.0"
@@ -92,7 +102,13 @@ name = "eonix_log"
 version = "0.1.0"
 dependencies = [
  "eonix_sync",
- "lazy_static",
+]
+
+[[package]]
+name = "eonix_mm"
+version = "0.1.0"
+dependencies = [
+ "intrusive_list",
 ]
 
 [[package]]
@@ -112,7 +128,6 @@ dependencies = [
  "eonix_preempt",
  "eonix_sync",
  "intrusive-collections",
- "lazy_static",
  "pointers",
 ]
 
@@ -120,7 +135,9 @@ dependencies = [
 name = "eonix_sync"
 version = "0.1.0"
 dependencies = [
+ "arch",
  "eonix_preempt",
+ "intrusive-collections",
 ]
 
 [[package]]
@@ -131,15 +148,16 @@ dependencies = [
  "atomic_unique_refcell",
  "bindgen",
  "bitflags",
+ "buddy_allocator",
  "eonix_log",
+ "eonix_mm",
  "eonix_preempt",
  "eonix_runtime",
  "eonix_sync",
  "intrusive-collections",
  "itertools",
- "lazy_static",
  "pointers",
- "spin",
+ "posix_types",
 ]
 
 [[package]]
@@ -157,6 +175,10 @@ dependencies = [
  "memoffset",
 ]
 
+[[package]]
+name = "intrusive_list"
+version = "0.1.0"
+
 [[package]]
 name = "itertools"
 version = "0.13.0"
@@ -166,15 +188,6 @@ dependencies = [
  "either",
 ]
 
-[[package]]
-name = "lazy_static"
-version = "1.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
-dependencies = [
- "spin",
-]
-
 [[package]]
 name = "libc"
 version = "0.2.164"
@@ -191,16 +204,6 @@ dependencies = [
  "windows-targets",
 ]
 
-[[package]]
-name = "lock_api"
-version = "0.4.12"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17"
-dependencies = [
- "autocfg",
- "scopeguard",
-]
-
 [[package]]
 name = "log"
 version = "0.4.22"
@@ -251,6 +254,10 @@ dependencies = [
 name = "pointers"
 version = "0.1.0"
 
+[[package]]
+name = "posix_types"
+version = "0.1.0"
+
 [[package]]
 name = "prettyplease"
 version = "0.2.25"
@@ -314,27 +321,12 @@ version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
 
-[[package]]
-name = "scopeguard"
-version = "1.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
-
 [[package]]
 name = "shlex"
 version = "1.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
 
-[[package]]
-name = "spin"
-version = "0.9.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
-dependencies = [
- "lock_api",
-]
-
 [[package]]
 name = "syn"
 version = "2.0.89"

+ 11 - 15
Cargo.toml

@@ -11,30 +11,24 @@ arch = { path = "./arch" }
 atomic_unique_refcell = { path = "./crates/atomic_unique_refcell", features = [
     "no_std",
 ] }
+buddy_allocator = { path = "./crates/buddy_allocator" }
+eonix_mm = { path = "./crates/eonix_mm" }
 eonix_preempt = { path = "./crates/eonix_preempt" }
 eonix_runtime = { path = "./crates/eonix_runtime" }
 eonix_sync = { path = "./crates/eonix_sync" }
 eonix_log = { path = "./crates/eonix_log" }
 pointers = { path = "./crates/pointers" }
+posix_types = { path = "./crates/posix_types" }
 
 bitflags = "2.6.0"
 intrusive-collections = "0.9.7"
 itertools = { version = "0.13.0", default-features = false }
-lazy_static = { version = "1.5.0", features = ["spin_no_std"] }
-spin = "0.9.8"
 
 [features]
-default = ["smp", "trace_future"]
-trace_condvar = []
+default = ["smp"]
 trace_syscall = []
 trace_scheduler = []
-trace_future = []
-log_trace = [
-    "trace_condvar",
-    "trace_syscall",
-    "trace_scheduler",
-    "trace_future",
-]
+log_trace = ["trace_syscall", "trace_scheduler"]
 log_debug = []
 smp = []
 
@@ -48,17 +42,19 @@ panic = "abort"
 opt-level = 0
 
 [profile.dev.package.eonix_preempt]
-opt-level = 0
+opt-level = 2
 
 [profile.dev.package.eonix_runtime]
 opt-level = 0
 
 [profile.dev.package.eonix_sync]
-opt-level = 0
+opt-level = 2
 
-[profile.dev.package."*"]
+[profile.dev.package.intrusive_list]
 opt-level = 2
-debug = false
+
+[profile.dev.package."*"]
+opt-level = "s"
 
 [profile.dev.build-override]
 opt-level = 0

+ 2 - 1
arch/Cargo.toml

@@ -4,5 +4,6 @@ version = "0.1.0"
 edition = "2021"
 
 [dependencies]
-percpu-macros = { path="./percpu-macros" }
+eonix_mm = { path = "../crates/eonix_mm" }
+percpu-macros = { path = "./percpu-macros" }
 cfg-if = "1.0"

+ 13 - 10
arch/percpu-macros/src/arch.rs

@@ -5,16 +5,19 @@ use syn::{Ident, Type};
 /// Get the base address for percpu variables of the current thread.
 pub fn get_percpu_pointer(percpu: &Ident, ty: &Type) -> TokenStream {
     quote! {
-        #[cfg(target_arch = "x86_64")] {
-            let base: *mut #ty;
-            ::core::arch::asm!(
-                "mov %gs:0, {address}",
-                "add ${percpu_pointer}, {address}",
-                percpu_pointer = sym #percpu,
-                address = out(reg) base,
-                options(att_syntax)
-            );
-            base
+        {
+            #[cfg(target_arch = "x86_64")]
+            {
+                let base: *mut #ty;
+                ::core::arch::asm!(
+                    "mov %gs:0, {address}",
+                    "add ${percpu_pointer}, {address}",
+                    percpu_pointer = sym #percpu,
+                    address = out(reg) base,
+                    options(att_syntax)
+                );
+                base
+            }
         }
     }
     .into()

+ 64 - 0
arch/percpu-macros/src/lib.rs

@@ -115,3 +115,67 @@ pub fn define_percpu(attrs: TokenStream, item: TokenStream) -> TokenStream {
     }
     .into()
 }
+
+#[proc_macro_attribute]
+pub fn define_percpu_shared(attrs: TokenStream, item: TokenStream) -> TokenStream {
+    if !attrs.is_empty() {
+        panic!("`define_percpu_shared` attribute does not take any arguments");
+    }
+
+    let item = parse_macro_input!(item as ItemStatic);
+    let vis = &item.vis;
+    let ident = &item.ident;
+    let ty = &item.ty;
+    let expr = &item.expr;
+
+    let inner_ident = format_ident!("_percpu_shared_inner_{}", ident);
+    let access_ident = format_ident!("_access_shared_{}", ident);
+
+    let as_ptr = arch::get_percpu_pointer(&inner_ident, &ty);
+
+    quote! {
+        #[link_section = ".percpu"]
+        #[allow(non_upper_case_globals)]
+        static #inner_ident: #ty = #expr;
+        #[allow(non_camel_case_types)]
+        #vis struct #access_ident;
+        #vis static #ident: #access_ident = #access_ident;
+
+        impl #access_ident {
+            fn as_ptr(&self) -> *const #ty {
+                unsafe { ( #as_ptr ) }
+            }
+
+            pub fn get_ref(&self) -> & #ty {
+                // SAFETY: This is safe because `as_ptr()` is guaranteed to be valid.
+                unsafe { self.as_ptr().as_ref().unwrap() }
+            }
+
+            pub fn get_for_cpu(&self, cpuid: usize) -> Option<& #ty > {
+                let offset = & #inner_ident as *const _ as usize;
+                let base = ::arch::PercpuArea::get_for(cpuid);
+                base.map(|base| unsafe { base.byte_add(offset).cast().as_ref() })
+            }
+        }
+
+        impl ::core::ops::Deref for #access_ident {
+            type Target = #ty;
+
+            fn deref(&self) -> &Self::Target {
+                self.get_ref()
+            }
+        }
+
+        impl<T> ::core::convert::AsRef<T> for #access_ident
+        where
+            <Self as ::core::ops::Deref>::Target: ::core::convert::AsRef<T>,
+        {
+            fn as_ref(&self) -> &T {
+                use ::core::ops::Deref;
+
+                self.deref().as_ref()
+            }
+        }
+    }
+    .into()
+}

+ 34 - 0
arch/src/x86_64/fence.rs

@@ -0,0 +1,34 @@
+use core::arch::asm;
+
+#[doc(hidden)]
+/// Issues a full memory barrier.
+///
+/// Note that this acts as a low-level operation **ONLY** and should be used with caution.
+/// **NO COMPILER BARRIERS** are emitted by this function.
+pub fn memory_barrier() {
+    unsafe {
+        asm!("mfence", options(nostack, nomem, preserves_flags));
+    }
+}
+
+#[doc(hidden)]
+/// Issues a read memory barrier.
+///
+/// Note that this acts as a low-level operation **ONLY** and should be used with caution.
+/// **NO COMPILER BARRIERS** are emitted by this function.
+pub fn read_memory_barrier() {
+    unsafe {
+        asm!("lfence", options(nostack, nomem, preserves_flags));
+    }
+}
+
+#[doc(hidden)]
+/// Issues a write memory barrier.
+///
+/// Note that this acts as a low-level operation **ONLY** and should be used with caution.
+/// **NO COMPILER BARRIERS** are emitted by this function.
+pub fn write_memory_barrier() {
+    unsafe {
+        asm!("sfence", options(nostack, nomem, preserves_flags));
+    }
+}

+ 10 - 39
arch/src/x86_64/init.rs

@@ -1,10 +1,5 @@
-use core::{
-    alloc::Layout,
-    pin::Pin,
-    ptr::{addr_of, NonNull},
-};
-
-use super::{enable_sse, percpu::init_percpu_area_thiscpu, GDTEntry, InterruptControl, GDT};
+use super::{enable_sse, GDTEntry, InterruptControl, GDT};
+use core::{pin::Pin, ptr::addr_of};
 
 #[repr(C)]
 #[derive(Debug, Clone, Copy)]
@@ -15,7 +10,7 @@ struct TSS_SP {
 }
 
 #[repr(C)]
-pub struct TSS {
+pub(crate) struct TSS {
     _reserved1: u32,
     rsp: [TSS_SP; 3],
     _reserved2: u32,
@@ -48,46 +43,22 @@ impl TSS {
     }
 }
 
-/// Architecture-specific per-cpu status.
-#[allow(dead_code)]
-pub struct CPUStatus {
-    id: usize,
+/// Architecture-specific cpu status data.
+pub struct CPU {
+    cpuid: usize,
     gdt: GDT,
     tss: TSS,
-
-    percpu_area: NonNull<u8>,
     pub interrupt: InterruptControl,
 }
 
-impl CPUStatus {
-    pub unsafe fn new_thiscpu<F>(allocate: F) -> Self
-    where
-        F: FnOnce(Layout) -> NonNull<u8>,
-    {
-        const PAGE_SIZE: usize = 0x1000;
-        extern "C" {
-            static PERCPU_PAGES: usize;
-            fn _PERCPU_DATA_START();
-        }
-
-        let percpu_area = allocate(Layout::from_size_align_unchecked(
-            PERCPU_PAGES * PAGE_SIZE,
-            PAGE_SIZE,
-        ));
-
-        percpu_area.copy_from_nonoverlapping(
-            NonNull::new(_PERCPU_DATA_START as *mut u8).unwrap(),
-            PERCPU_PAGES * PAGE_SIZE,
-        );
-
+impl CPU {
+    pub fn new() -> Self {
         let (interrupt_control, cpuid) = InterruptControl::new();
 
-        init_percpu_area_thiscpu(percpu_area);
         Self {
-            id: cpuid,
+            cpuid,
             gdt: GDT::new(),
             tss: TSS::new(),
-            percpu_area,
             interrupt: interrupt_control,
         }
     }
@@ -126,7 +97,7 @@ impl CPUStatus {
     }
 
     pub fn cpuid(&self) -> usize {
-        self.id
+        self.cpuid
     }
 }
 

+ 36 - 3
arch/src/x86_64/interrupt.rs

@@ -306,6 +306,9 @@ pub struct InterruptControl {
     apic_base: APICRegs,
 }
 
+/// State of the interrupt flag.
+pub struct IrqState(u64);
+
 impl InterruptContext {
     pub fn set_return_value(&mut self, value: u64) {
         // The return value is stored in rax.
@@ -423,7 +426,7 @@ impl APICRegs {
 impl InterruptControl {
     /// # Return
     /// Returns a tuple of InterruptControl and the cpu id of the current cpu.
-    pub unsafe fn new() -> (Self, usize) {
+    pub(crate) fn new() -> (Self, usize) {
         extern "C" {
             static ISR_START_ADDR: usize;
         }
@@ -491,18 +494,48 @@ impl InterruptControl {
     }
 }
 
+impl IrqState {
+    pub fn restore(self) {
+        let Self(state) = self;
+
+        unsafe {
+            asm!(
+                "push {state}",
+                "popf",
+                state = in(reg) state,
+                options(att_syntax, nomem)
+            );
+        }
+    }
+}
+
 pub fn enable_irqs() {
     unsafe {
-        asm!("sti");
+        asm!("sti", options(att_syntax, nomem, nostack));
     }
 }
 
 pub fn disable_irqs() {
     unsafe {
-        asm!("cli");
+        asm!("cli", options(att_syntax, nomem, nostack));
     }
 }
 
+pub fn disable_irqs_save() -> IrqState {
+    let state: u64;
+    unsafe {
+        asm!(
+            "pushf",
+            "pop {state}",
+            "cli",
+            state = out(reg) state,
+            options(att_syntax, nomem)
+        );
+    }
+
+    IrqState(state)
+}
+
 extern "C" {
     pub fn _arch_fork_return();
 }

+ 219 - 0
arch/src/x86_64/mm.rs

@@ -0,0 +1,219 @@
+use core::{marker::PhantomData, ptr::NonNull};
+use eonix_mm::{
+    address::{Addr as _, PAddr},
+    page_table::{PageAttribute, PageTableLevel, PagingMode, RawPageTable, PTE},
+    paging::{PageBlock, PFN},
+};
+
+pub const PAGE_SIZE: usize = 0x1000;
+
+const KERNEL_PML4_PFN: PFN = PFN::from_val(0x2000 >> 12);
+
+const PA_P: u64 = 0x001;
+const PA_RW: u64 = 0x002;
+const PA_US: u64 = 0x004;
+#[allow(dead_code)]
+const PA_PWT: u64 = 0x008;
+#[allow(dead_code)]
+const PA_PCD: u64 = 0x010;
+const PA_A: u64 = 0x020;
+const PA_D: u64 = 0x040;
+#[allow(dead_code)]
+const PA_PS: u64 = 0x080;
+const PA_G: u64 = 0x100;
+const PA_COW: u64 = 0x200;
+const PA_MMAP: u64 = 0x400;
+const PA_ANON: u64 = 0x800;
+const PA_NXE: u64 = 0x8000_0000_0000_0000;
+const PA_MASK: u64 = 0xfff0_0000_0000_0fff;
+
+#[repr(transparent)]
+pub struct PTE64(u64);
+
+#[derive(Clone, Copy)]
+pub struct PageAttribute64(u64);
+
+pub struct RawPageTable4Levels<'a>(NonNull<PTE64>, PhantomData<&'a ()>);
+
+pub struct PagingMode4Levels;
+
+impl PTE for PTE64 {
+    type Attr = PageAttribute64;
+
+    fn set(&mut self, pfn: PFN, attr: Self::Attr) {
+        let paddr = PAddr::from(pfn).addr();
+
+        self.0 = (paddr as u64 & !PA_MASK) | (attr.0 & PA_MASK);
+    }
+
+    fn get(&self) -> (PFN, Self::Attr) {
+        (
+            PFN::from(PAddr::from((self.0 & !PA_MASK) as usize)),
+            PageAttribute64(self.0 & PA_MASK),
+        )
+    }
+
+    fn take(&mut self) -> (PFN, Self::Attr) {
+        let pfn_attr = self.get();
+        self.0 = 0;
+        pfn_attr
+    }
+}
+
+impl PagingMode for PagingMode4Levels {
+    type Entry = PTE64;
+    type RawTable<'a> = RawPageTable4Levels<'a>;
+
+    const LEVELS: &'static [PageTableLevel] = &[
+        PageTableLevel::new(39, 9),
+        PageTableLevel::new(30, 9),
+        PageTableLevel::new(21, 9),
+        PageTableLevel::new(12, 9),
+    ];
+
+    const KERNEL_ROOT_TABLE_PFN: PFN = KERNEL_PML4_PFN;
+}
+
+impl<'a> RawPageTable<'a> for RawPageTable4Levels<'a> {
+    type Entry = PTE64;
+
+    fn index(&self, index: u16) -> &'a Self::Entry {
+        unsafe { &self.0.cast::<[PTE64; 512]>().as_ref()[index as usize] }
+    }
+
+    fn index_mut(&mut self, index: u16) -> &'a mut Self::Entry {
+        unsafe { &mut self.0.cast::<[PTE64; 512]>().as_mut()[index as usize] }
+    }
+
+    unsafe fn from_ptr(ptr: NonNull<PageBlock>) -> Self {
+        Self(ptr.cast(), PhantomData)
+    }
+}
+
+impl PageAttribute for PageAttribute64 {
+    fn new() -> Self {
+        Self(PA_NXE)
+    }
+
+    fn present(self, present: bool) -> Self {
+        if present {
+            Self(self.0 | PA_P)
+        } else {
+            Self(self.0 & !PA_P)
+        }
+    }
+
+    fn write(self, write: bool) -> Self {
+        if write {
+            Self(self.0 | PA_RW)
+        } else {
+            Self(self.0 & !PA_RW)
+        }
+    }
+
+    fn execute(self, execute: bool) -> Self {
+        if execute {
+            Self(self.0 & !PA_NXE)
+        } else {
+            Self(self.0 | PA_NXE)
+        }
+    }
+
+    fn user(self, user: bool) -> Self {
+        if user {
+            Self(self.0 | PA_US)
+        } else {
+            Self(self.0 & !PA_US)
+        }
+    }
+
+    fn accessed(self, accessed: bool) -> Self {
+        if accessed {
+            Self(self.0 | PA_A)
+        } else {
+            Self(self.0 & !PA_A)
+        }
+    }
+
+    fn dirty(self, dirty: bool) -> Self {
+        if dirty {
+            Self(self.0 | PA_D)
+        } else {
+            Self(self.0 & !PA_D)
+        }
+    }
+
+    fn global(self, global: bool) -> Self {
+        if global {
+            Self(self.0 | PA_G)
+        } else {
+            Self(self.0 & !PA_G)
+        }
+    }
+
+    fn copy_on_write(self, cow: bool) -> Self {
+        if cow {
+            Self(self.0 | PA_COW)
+        } else {
+            Self(self.0 & !PA_COW)
+        }
+    }
+
+    fn mapped(self, mmap: bool) -> Self {
+        if mmap {
+            Self(self.0 | PA_MMAP)
+        } else {
+            Self(self.0 & !PA_MMAP)
+        }
+    }
+
+    fn anonymous(self, anon: bool) -> Self {
+        if anon {
+            Self(self.0 | PA_ANON)
+        } else {
+            Self(self.0 & !PA_ANON)
+        }
+    }
+
+    fn is_present(&self) -> bool {
+        self.0 & PA_P != 0
+    }
+
+    fn is_write(&self) -> bool {
+        self.0 & PA_RW != 0
+    }
+
+    fn is_execute(&self) -> bool {
+        self.0 & PA_NXE == 0
+    }
+
+    fn is_user(&self) -> bool {
+        self.0 & PA_US != 0
+    }
+
+    fn is_accessed(&self) -> bool {
+        self.0 & PA_A != 0
+    }
+
+    fn is_dirty(&self) -> bool {
+        self.0 & PA_D != 0
+    }
+
+    fn is_global(&self) -> bool {
+        self.0 & PA_G != 0
+    }
+
+    fn is_copy_on_write(&self) -> bool {
+        self.0 & PA_COW != 0
+    }
+
+    fn is_mapped(&self) -> bool {
+        self.0 & PA_MMAP != 0
+    }
+
+    fn is_anonymous(&self) -> bool {
+        self.0 & PA_ANON != 0
+    }
+}
+
+pub type DefaultPagingMode = PagingMode4Levels;

+ 16 - 10
arch/src/x86_64/mod.rs

@@ -1,11 +1,16 @@
 mod context;
+mod fence;
 mod gdt;
 mod init;
 mod interrupt;
 mod io;
+mod mm;
+mod percpu;
 mod user;
 
-pub(self) mod percpu;
+use core::arch::asm;
+use eonix_mm::address::{Addr as _, PAddr, VAddr};
+use eonix_mm::paging::PFN;
 
 pub use self::context::*;
 pub use self::gdt::*;
@@ -13,9 +18,10 @@ pub use self::init::*;
 pub use self::interrupt::*;
 pub use self::io::*;
 pub use self::user::*;
-pub use percpu_macros::define_percpu;
-
-use core::arch::asm;
+pub use fence::*;
+pub use mm::*;
+pub use percpu::*;
+pub use percpu_macros::{define_percpu, define_percpu_shared};
 
 #[inline(always)]
 pub fn flush_tlb(vaddr: usize) {
@@ -41,7 +47,7 @@ pub fn flush_tlb_all() {
 }
 
 #[inline(always)]
-pub fn get_root_page_table() -> usize {
+pub fn get_root_page_table_pfn() -> PFN {
     let cr3: usize;
     unsafe {
         asm!(
@@ -50,22 +56,22 @@ pub fn get_root_page_table() -> usize {
             options(att_syntax)
         );
     }
-    cr3
+    PFN::from(PAddr::from(cr3))
 }
 
 #[inline(always)]
-pub fn set_root_page_table(pfn: usize) {
+pub fn set_root_page_table_pfn(pfn: PFN) {
     unsafe {
         asm!(
             "mov {0}, %cr3",
-            in(reg) pfn,
+            in(reg) PAddr::from(pfn).addr(),
             options(att_syntax)
         );
     }
 }
 
 #[inline(always)]
-pub fn get_page_fault_address() -> usize {
+pub fn get_page_fault_address() -> VAddr {
     let cr2: usize;
     unsafe {
         asm!(
@@ -74,7 +80,7 @@ pub fn get_page_fault_address() -> usize {
             options(att_syntax)
         );
     }
-    cr2
+    VAddr::from(cr2)
 }
 
 #[inline(always)]

+ 80 - 10
arch/src/x86_64/percpu.rs

@@ -1,16 +1,86 @@
 use super::wrmsr;
-use core::{arch::asm, ptr::NonNull};
+use crate::x86_64::mm::PAGE_SIZE;
+use core::{
+    alloc::Layout,
+    arch::asm,
+    cell::UnsafeCell,
+    ptr::{null_mut, NonNull},
+    sync::atomic::{AtomicPtr, Ordering},
+};
 
-fn save_percpu_pointer(percpu_area_base: NonNull<u8>) {
-    wrmsr(0xC0000101, percpu_area_base.as_ptr() as u64);
+pub const MAX_CPUS: usize = 256;
+
+#[repr(align(4096))]
+struct PercpuData(UnsafeCell<()>); // Not `Sync`.
+
+pub struct PercpuArea {
+    data: NonNull<PercpuData>,
 }
 
-pub unsafe fn init_percpu_area_thiscpu(percpu_area_base: NonNull<u8>) {
-    save_percpu_pointer(percpu_area_base);
+static PERCPU_POINTERS: [AtomicPtr<PercpuData>; MAX_CPUS] =
+    [const { AtomicPtr::new(null_mut()) }; MAX_CPUS];
+
+impl PercpuArea {
+    fn page_count() -> usize {
+        extern "C" {
+            static PERCPU_PAGES: usize;
+        }
+        // SAFETY: `PERCPU_PAGES` is defined in linker script and never change.
+        let page_count = unsafe { PERCPU_PAGES };
+        assert_ne!(page_count, 0);
+        page_count
+    }
+
+    fn data_start() -> NonNull<u8> {
+        extern "C" {
+            fn _PERCPU_DATA_START();
+        }
+
+        NonNull::new(_PERCPU_DATA_START as usize as *mut _)
+            .expect("Percpu data should not be null.")
+    }
+
+    fn layout() -> Layout {
+        Layout::from_size_align(Self::page_count() * PAGE_SIZE, PAGE_SIZE).expect("Invalid layout.")
+    }
+
+    pub fn new<F>(allocate: F) -> Self
+    where
+        F: FnOnce(Layout) -> NonNull<u8>,
+    {
+        let data_pointer = allocate(Self::layout());
+
+        unsafe {
+            // SAFETY: The `data_pointer` is of valid length and properly aligned.
+            data_pointer
+                .copy_from_nonoverlapping(Self::data_start(), Self::page_count() * PAGE_SIZE);
+        }
+
+        Self {
+            data: data_pointer.cast(),
+        }
+    }
+
+    /// Set up the percpu area for the current CPU.
+    pub fn setup(&self) {
+        wrmsr(0xC0000101, self.data.as_ptr() as u64);
+
+        unsafe {
+            // SAFETY: %gs:0 points to the start of the percpu area.
+            asm!(
+                "movq {}, %gs:0",
+                in(reg) self.data.as_ptr(),
+                options(nostack, preserves_flags, att_syntax)
+            );
+        }
+    }
+
+    pub fn register(self: Self, cpuid: usize) {
+        PERCPU_POINTERS[cpuid].store(self.data.as_ptr(), Ordering::Release);
+    }
 
-    asm!(
-        "movq {}, %gs:0",
-        in(reg) percpu_area_base.as_ptr(),
-        options(att_syntax)
-    );
+    pub fn get_for(cpuid: usize) -> Option<NonNull<()>> {
+        let pointer = PERCPU_POINTERS[cpuid].load(Ordering::Acquire);
+        NonNull::new(pointer.cast())
+    }
 }

+ 3 - 3
arch/src/x86_64/user.rs

@@ -1,6 +1,6 @@
 use core::pin::Pin;
 
-use super::{CPUStatus, GDTEntry};
+use super::{CPU, GDTEntry};
 
 #[derive(Debug, Clone)]
 pub enum UserTLS {
@@ -28,7 +28,7 @@ impl UserTLS {
         )
     }
 
-    pub fn load(&self, cpu_status: Pin<&mut CPUStatus>) {
+    pub fn load(&self, cpu_status: Pin<&mut CPU>) {
         match self {
             Self::TLS64(base) => {
                 const IA32_KERNEL_GS_BASE: u32 = 0xc0000102;
@@ -48,7 +48,7 @@ impl UserTLS {
     }
 }
 
-pub unsafe fn load_interrupt_stack(cpu_status: Pin<&mut CPUStatus>, stack: u64) {
+pub unsafe fn load_interrupt_stack(cpu_status: Pin<&mut CPU>, stack: u64) {
     // SAFETY: We don't move the CPUStatus object.
     cpu_status.get_unchecked_mut().set_rsp0(stack);
 }

+ 9 - 0
crates/buddy_allocator/Cargo.toml

@@ -0,0 +1,9 @@
+[package]
+name = "buddy_allocator"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+eonix_mm = { path = "../eonix_mm" }
+eonix_sync = { path = "../eonix_sync" }
+intrusive_list = { path = "../intrusive_list" }

+ 47 - 0
crates/buddy_allocator/src/free_area.rs

@@ -0,0 +1,47 @@
+use core::marker::{Send, Sync};
+use eonix_mm::paging::{PageFlags, RawPage, RawPagePtr};
+use intrusive_list::{container_of, Link};
+
+pub struct FreeArea {
+    free_list: Link,
+    count: usize,
+}
+
+unsafe impl Send for FreeArea {}
+unsafe impl Sync for FreeArea {}
+
+impl FreeArea {
+    pub const fn new() -> Self {
+        Self {
+            free_list: Link::new(),
+            count: 0,
+        }
+    }
+
+    pub fn get_free_pages(&mut self) -> Option<RawPagePtr> {
+        self.free_list.next_mut().map(|pages_link| {
+            assert_ne!(self.count, 0);
+
+            let pages_ptr = unsafe { container_of!(pages_link, RawPage, link) };
+            let pages_ptr = RawPagePtr::new(pages_ptr);
+
+            self.count -= 1;
+            pages_link.remove();
+
+            pages_ptr
+        })
+    }
+
+    pub fn add_pages(&mut self, pages_ptr: RawPagePtr) {
+        self.count += 1;
+        pages_ptr.as_mut().flags.set(PageFlags::FREE);
+        self.free_list.insert(&mut pages_ptr.as_mut().link)
+    }
+
+    pub fn del_pages(&mut self, pages_ptr: RawPagePtr) {
+        assert!(self.count >= 1 && pages_ptr.as_ref().flags.has(PageFlags::FREE));
+        self.count -= 1;
+        pages_ptr.as_mut().flags.clear(PageFlags::FREE);
+        pages_ptr.as_mut().link.remove();
+    }
+}

+ 73 - 0
crates/buddy_allocator/src/lib.rs

@@ -0,0 +1,73 @@
+#![no_std]
+
+mod free_area;
+mod zone;
+
+use core::sync::atomic::Ordering;
+use eonix_mm::{
+    address::PAddr,
+    paging::{PageAlloc, PageFlags, RawPagePtr, PFN},
+};
+use eonix_sync::Spin;
+use zone::Zone;
+
+pub use free_area::FreeArea;
+
+const MAX_ORDER: u32 = 10;
+const ZONE_AREAS: usize = const { MAX_ORDER as usize + 1 };
+
+static BUDDY_ALLOCATOR: BuddyAllocator = BuddyAllocator::new();
+
+pub struct BuddyAllocator {
+    zone: Spin<Zone<ZONE_AREAS>>,
+}
+
+impl BuddyAllocator {
+    const fn new() -> Self {
+        Self {
+            zone: Spin::new(Zone::new()),
+        }
+    }
+
+    pub fn create_pages(start: PAddr, end: PAddr) {
+        BUDDY_ALLOCATOR.zone.lock().create_pages(start, end);
+    }
+}
+
+impl PageAlloc for BuddyAllocator {
+    fn alloc_order(order: u32) -> Option<RawPagePtr> {
+        let pages_ptr = BUDDY_ALLOCATOR.zone.lock().get_free_pages(order);
+
+        if let Some(pages_ptr) = pages_ptr {
+            // SAFETY: Memory order here can be Relaxed is for the same reason as that
+            // in the copy constructor of `std::shared_ptr`.
+            pages_ptr.refcount().fetch_add(1, Ordering::Relaxed);
+            pages_ptr.flags().clear(PageFlags::FREE);
+        }
+
+        pages_ptr
+    }
+
+    unsafe fn dealloc(page_ptr: RawPagePtr) {
+        BUDDY_ALLOCATOR.zone.lock().free_pages(page_ptr);
+    }
+
+    unsafe fn has_management_over(page_ptr: RawPagePtr) -> bool {
+        !page_ptr.flags().has(PageFlags::FREE) && page_ptr.flags().has(PageFlags::BUDDY)
+    }
+}
+
+pub(self) trait BuddyPFNOps {
+    fn buddy_pfn(self, order: u32) -> PFN;
+    fn combined_pfn(self, buddy_pfn: PFN) -> PFN;
+}
+
+impl BuddyPFNOps for PFN {
+    fn buddy_pfn(self, order: u32) -> PFN {
+        PFN::from(usize::from(self) ^ (1 << order))
+    }
+
+    fn combined_pfn(self, buddy_pfn: PFN) -> PFN {
+        PFN::from(usize::from(self) & usize::from(buddy_pfn))
+    }
+}

+ 122 - 0
crates/buddy_allocator/src/zone.rs

@@ -0,0 +1,122 @@
+use crate::BuddyPFNOps as _;
+
+use super::free_area::FreeArea;
+use core::sync::atomic::Ordering;
+use eonix_mm::{
+    address::{AddrOps as _, PAddr},
+    paging::{PageFlags, RawPagePtr, PFN},
+};
+
+pub(super) struct Zone<const AREAS: usize> {
+    free_areas: [FreeArea; AREAS],
+}
+
+impl<const AREAS: usize> Zone<AREAS> {
+    pub const fn new() -> Self {
+        Self {
+            free_areas: [const { FreeArea::new() }; AREAS],
+        }
+    }
+
+    pub fn get_free_pages(&mut self, order: u32) -> Option<RawPagePtr> {
+        for current_order in order..AREAS as u32 {
+            let pages_ptr = self.free_areas[current_order as usize].get_free_pages();
+            let Some(pages_ptr) = pages_ptr else { continue };
+
+            pages_ptr.as_mut().order = order;
+
+            if current_order > order {
+                self.expand(pages_ptr, current_order, order);
+            }
+            assert!(pages_ptr.flags().has(PageFlags::PRESENT | PageFlags::FREE));
+
+            return Some(pages_ptr);
+        }
+        None
+    }
+
+    fn expand(&mut self, pages_ptr: RawPagePtr, order: u32, target_order: u32) {
+        let mut offset = 1 << order;
+
+        for order in (target_order..order).rev() {
+            offset >>= 1;
+            let split_pages_ptr = pages_ptr.offset(offset);
+            split_pages_ptr.as_mut().order = order;
+            split_pages_ptr.flags().set(PageFlags::BUDDY);
+            self.free_areas[order as usize].add_pages(split_pages_ptr);
+        }
+    }
+
+    pub fn free_pages(&mut self, mut pages_ptr: RawPagePtr) {
+        assert_eq!(pages_ptr.refcount().load(Ordering::Relaxed), 0);
+
+        let mut pfn = PFN::from(pages_ptr);
+        let mut current_order = pages_ptr.order();
+
+        while current_order < (AREAS - 1) as u32 {
+            let buddy_pfn = pfn.buddy_pfn(current_order);
+            let buddy_pages_ptr = RawPagePtr::from(buddy_pfn);
+
+            if !self.buddy_check(buddy_pages_ptr, current_order) {
+                break;
+            }
+
+            pages_ptr.flags().clear(PageFlags::BUDDY);
+            buddy_pages_ptr.flags().clear(PageFlags::BUDDY);
+            self.free_areas[current_order as usize].del_pages(buddy_pages_ptr);
+
+            pages_ptr = RawPagePtr::from(pfn.combined_pfn(buddy_pfn));
+            pfn = pfn.combined_pfn(buddy_pfn);
+
+            pages_ptr.flags().set(PageFlags::BUDDY);
+            current_order += 1;
+        }
+
+        pages_ptr.as_mut().order = current_order;
+        self.free_areas[current_order as usize].add_pages(pages_ptr);
+    }
+
+    /// This function checks whether a page is free && is a buddy
+    /// we can coalesce a page and its buddy if
+    /// - the buddy is valid(present) &&
+    /// - the buddy is right now in free_areas &&
+    /// - a page and its buddy have the same order &&
+    /// - a page and its buddy are in the same zone.    // check when smp
+    fn buddy_check(&self, pages_ptr: RawPagePtr, order: u32) -> bool {
+        if !pages_ptr.flags().has(PageFlags::PRESENT) {
+            return false;
+        }
+        if !pages_ptr.flags().has(PageFlags::FREE) {
+            return false;
+        }
+        if pages_ptr.flags().has(PageFlags::LOCAL) {
+            return false;
+        }
+        if pages_ptr.as_ref().order != order {
+            return false;
+        }
+
+        assert_eq!(pages_ptr.refcount().load(Ordering::Relaxed), 0);
+        true
+    }
+
+    /// Only used on buddy initialization
+    pub fn create_pages(&mut self, start: PAddr, end: PAddr) {
+        let mut start_pfn = PFN::from(start.ceil());
+        let end_pfn = PFN::from(end.floor());
+
+        while start_pfn < end_pfn {
+            let mut order = usize::from(start_pfn)
+                .trailing_zeros()
+                .min((AREAS - 1) as u32);
+
+            while start_pfn + order as usize > end_pfn {
+                order -= 1;
+            }
+            let page_ptr: RawPagePtr = start_pfn.into();
+            page_ptr.flags().set(PageFlags::BUDDY);
+            self.free_areas[order as usize].add_pages(page_ptr);
+            start_pfn = start_pfn + (1 << order) as usize;
+        }
+    }
+}

+ 0 - 1
crates/eonix_log/Cargo.toml

@@ -5,4 +5,3 @@ edition = "2024"
 
 [dependencies]
 eonix_sync = { path = "../eonix_sync" }
-lazy_static = { version = "1.5.0", features = ["spin_no_std"] }

+ 1 - 4
crates/eonix_log/src/lib.rs

@@ -4,7 +4,6 @@ use core::fmt::{self, Write};
 
 use alloc::sync::Arc;
 use eonix_sync::Spin;
-use lazy_static::lazy_static;
 
 extern crate alloc;
 
@@ -17,9 +16,7 @@ struct Console {
 }
 
 // TODO!!!: We should use a `RwLock` here for better performance.
-lazy_static! {
-    static ref CONSOLE: Spin<Console> = Spin::new(Console::new());
-}
+static CONSOLE: Spin<Console> = Spin::new(Console::new());
 
 impl Console {
     const fn new() -> Self {

+ 7 - 0
crates/eonix_mm/Cargo.toml

@@ -0,0 +1,7 @@
+[package]
+name = "eonix_mm"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+intrusive_list = { path = "../intrusive_list" }

+ 14 - 0
crates/eonix_mm/src/address.rs

@@ -0,0 +1,14 @@
+mod addr;
+mod addr_range;
+mod error;
+mod paddr;
+mod vaddr;
+
+pub use addr::{Addr, AddrOps};
+pub use addr_range::AddrRange;
+pub use error::AddressError;
+pub use paddr::PAddr;
+pub use vaddr::VAddr;
+
+pub type PRange = AddrRange<PAddr>;
+pub type VRange = AddrRange<VAddr>;

+ 64 - 0
crates/eonix_mm/src/address/addr.rs

@@ -0,0 +1,64 @@
+use crate::paging::PAGE_SIZE;
+use core::ops::{Add, Sub};
+
+pub trait Addr:
+    Sized
+    + Copy
+    + Clone
+    + Ord
+    + PartialOrd
+    + Eq
+    + PartialEq
+    + Sub<Output = usize>
+    + Sub<usize, Output = Self>
+    + Add<usize, Output = Self>
+    + From<usize>
+{
+    fn addr(self) -> usize;
+}
+
+pub trait AddrOps: Sized {
+    fn offset_in(self, size: usize) -> usize;
+
+    fn is_aligned_to(self, size: usize) -> bool;
+
+    /// Aligns the address to the nearest lower multiple of `size`.
+    fn floor_to(self, size: usize) -> Self;
+
+    /// Aligns the address to the nearest lower multiple of `size`.
+    fn ceil_to(self, size: usize) -> Self;
+
+    fn page_offset(self) -> usize {
+        self.offset_in(PAGE_SIZE)
+    }
+
+    fn is_page_aligned(self) -> bool {
+        self.is_aligned_to(PAGE_SIZE)
+    }
+
+    fn floor(self) -> Self {
+        self.floor_to(PAGE_SIZE)
+    }
+
+    fn ceil(self) -> Self {
+        self.ceil_to(PAGE_SIZE)
+    }
+}
+
+impl<A: Addr> AddrOps for A {
+    fn offset_in(self, size: usize) -> usize {
+        self.addr() % size
+    }
+
+    fn is_aligned_to(self, size: usize) -> bool {
+        self.offset_in(size) == 0
+    }
+
+    fn floor_to(self, size: usize) -> Self {
+        Self::from(self.addr() / size * size)
+    }
+
+    fn ceil_to(self, size: usize) -> Self {
+        Self::from(self.addr().div_ceil(size) * size)
+    }
+}

+ 190 - 0
crates/eonix_mm/src/address/addr_range.rs

@@ -0,0 +1,190 @@
+use super::addr::Addr;
+use core::{cmp::Ordering, fmt, ops::RangeBounds};
+
+#[derive(Clone, Copy)]
+/// A range of addresses.
+///
+/// The range is defined by two addresses, `start` and `end` and is inclusive
+/// on the start and exclusive on the end.
+///
+/// # Relations
+///
+/// ## Comparison
+///
+/// ### Equal
+/// Any two ranges that have one of them **containing** the other are considered equal.
+///
+/// ### Less
+/// If the two are not equal, the one that has the **smallest** start address is considered less.
+///
+/// ### Greater
+/// If the two are not equal, the one that has the **largest** end address is considered greater.
+///
+/// ## Overlapping Check
+/// Use `overlap_with` instead of `==` to check if two ranges overlap.
+pub struct AddrRange<A: Addr> {
+    start: A,
+    end: A,
+}
+
+impl<A: Addr> Eq for AddrRange<A> {}
+impl<A: Addr> PartialOrd for AddrRange<A> {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl<A: Addr> PartialEq for AddrRange<A> {
+    fn eq(&self, other: &Self) -> bool {
+        self.cmp(other) == Ordering::Equal
+    }
+}
+
+impl<A: Addr> Ord for AddrRange<A> {
+    fn cmp(&self, other: &Self) -> Ordering {
+        if self.start == other.start {
+            return Ordering::Equal;
+        }
+
+        if self.end == other.end {
+            if self.start == self.end {
+                return Ordering::Greater;
+            }
+            if other.start == other.end {
+                return Ordering::Less;
+            }
+            return Ordering::Equal;
+        }
+
+        if self.start < other.start {
+            if other.end < self.end {
+                return Ordering::Equal;
+            } else {
+                return Ordering::Less;
+            }
+        }
+
+        if other.start < self.start {
+            if self.end < other.end {
+                return Ordering::Equal;
+            } else {
+                return Ordering::Greater;
+            }
+        }
+
+        unreachable!()
+    }
+}
+
+impl<A: Addr> From<A> for AddrRange<A> {
+    fn from(addr: A) -> Self {
+        Self {
+            start: addr,
+            end: addr,
+        }
+    }
+}
+
+impl<A: Addr> AddrRange<A> {
+    /// Creates a new `AddrRange` with the given start and end addresses.
+    ///
+    /// # Panics
+    /// Panics if the start address is greater than the end address.
+    ///
+    /// # Hint
+    /// Use `AddrRange::from(addr).grow(size)` to create a range of size `size`
+    /// starting from `addr`.
+    pub fn new(start: A, end: A) -> Self {
+        assert!(start <= end);
+        Self { start, end }
+    }
+
+    pub const fn start(&self) -> A {
+        self.start
+    }
+
+    pub const fn end(&self) -> A {
+        self.end
+    }
+
+    pub fn len(&self) -> usize {
+        self.end - self.start
+    }
+
+    pub fn shrink(&self, size: usize) -> Self {
+        assert!(size <= self.len());
+        Self::new(self.start, self.end - size)
+    }
+
+    pub fn grow(&self, count: usize) -> Self {
+        Self::new(self.start, self.end + count)
+    }
+
+    pub fn into_bounds(&self) -> impl RangeBounds<Self> {
+        if self.len() == 0 {
+            Self::from(self.start())..=Self::from(self.start())
+        } else {
+            Self::from(self.start())..=Self::from(self.end() - 1)
+        }
+    }
+
+    pub fn overlap_with(&self, other: &Self) -> bool {
+        self.start < other.end && self.end > other.start
+    }
+
+    pub fn split_at_checked(&self, at: A) -> (Option<Self>, Option<Self>) {
+        if self.end <= at {
+            (Some(*self), None)
+        } else if at <= self.start {
+            (None, Some(*self))
+        } else {
+            (
+                Some(Self::new(self.start, at)),
+                Some(Self::new(at, self.end)),
+            )
+        }
+    }
+
+    pub fn split_at(&self, at: A) -> (Self, Self) {
+        let (left, right) = self.split_at_checked(at);
+        (
+            left.expect("`at` is too large"),
+            right.expect("`at` is too small"),
+        )
+    }
+
+    pub fn mask_with_checked(&self, mask: &Self) -> Option<(Option<Self>, Self, Option<Self>)> {
+        if mask.len() == 0 || !self.overlap_with(mask) {
+            return None;
+        }
+
+        let left;
+        let mut mid;
+        let right;
+
+        if self.start < mask.start && mask.start < self.end {
+            let (l, r) = self.split_at(mask.start);
+            left = Some(l);
+            mid = r;
+        } else {
+            left = None;
+            mid = *self;
+        }
+
+        if mask.end < self.end {
+            let (l, r) = mid.split_at(mask.end);
+            mid = l;
+            right = Some(r);
+        } else {
+            right = None;
+        }
+
+        Some((left, mid, right))
+    }
+}
+
+impl<A: Addr + fmt::Debug> fmt::Debug for AddrRange<A> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "[{:?}, {:?})", self.start, self.end)
+    }
+}

+ 4 - 0
crates/eonix_mm/src/address/error.rs

@@ -0,0 +1,4 @@
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum AddressError {
+    InvalidAddress,
+}

+ 65 - 0
crates/eonix_mm/src/address/paddr.rs

@@ -0,0 +1,65 @@
+use super::addr::Addr;
+use crate::paging::{PAGE_SIZE_BITS, PFN};
+use core::{
+    fmt,
+    ops::{Add, Sub},
+};
+
+#[repr(transparent)]
+#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]
+pub struct PAddr(usize);
+
+impl From<usize> for PAddr {
+    fn from(v: usize) -> Self {
+        Self(v)
+    }
+}
+
+impl Sub for PAddr {
+    type Output = usize;
+
+    fn sub(self, rhs: Self) -> Self::Output {
+        self.0 - rhs.0
+    }
+}
+
+impl Sub<usize> for PAddr {
+    type Output = Self;
+
+    fn sub(self, rhs: usize) -> Self::Output {
+        PAddr(self.0 - rhs)
+    }
+}
+
+impl Add<usize> for PAddr {
+    type Output = Self;
+
+    fn add(self, rhs: usize) -> Self::Output {
+        PAddr(self.0 + rhs)
+    }
+}
+
+impl fmt::Debug for PAddr {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "PAddr({:#x})", self.0)
+    }
+}
+
+impl Addr for PAddr {
+    fn addr(self) -> usize {
+        let Self(addr) = self;
+        addr
+    }
+}
+
+impl From<PFN> for PAddr {
+    fn from(value: PFN) -> Self {
+        Self(usize::from(value) << PAGE_SIZE_BITS)
+    }
+}
+
+impl PAddr {
+    pub const fn from_val(val: usize) -> Self {
+        Self(val)
+    }
+}

+ 60 - 0
crates/eonix_mm/src/address/vaddr.rs

@@ -0,0 +1,60 @@
+use super::addr::Addr;
+use core::{
+    fmt,
+    ops::{Add, Sub},
+};
+
+#[repr(transparent)]
+#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]
+pub struct VAddr(usize);
+
+impl From<usize> for VAddr {
+    fn from(v: usize) -> Self {
+        Self::from(v)
+    }
+}
+
+impl VAddr {
+    pub const NULL: Self = Self(0);
+
+    pub const fn from(v: usize) -> Self {
+        Self(v)
+    }
+}
+
+impl Sub for VAddr {
+    type Output = usize;
+
+    fn sub(self, rhs: Self) -> Self::Output {
+        self.0 - rhs.0
+    }
+}
+
+impl Sub<usize> for VAddr {
+    type Output = Self;
+
+    fn sub(self, rhs: usize) -> Self::Output {
+        VAddr(self.0 - rhs)
+    }
+}
+
+impl Add<usize> for VAddr {
+    type Output = Self;
+
+    fn add(self, rhs: usize) -> Self::Output {
+        VAddr(self.0 + rhs)
+    }
+}
+
+impl fmt::Debug for VAddr {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "VAddr({:#x})", self.0)
+    }
+}
+
+impl Addr for VAddr {
+    fn addr(self) -> usize {
+        let Self(addr) = self;
+        addr
+    }
+}

+ 5 - 0
crates/eonix_mm/src/lib.rs

@@ -0,0 +1,5 @@
+#![no_std]
+
+pub mod address;
+pub mod page_table;
+pub mod paging;

+ 9 - 0
crates/eonix_mm/src/page_table.rs

@@ -0,0 +1,9 @@
+mod page_table;
+mod paging_mode;
+mod pte;
+mod pte_iterator;
+
+pub use page_table::{PageTable, RawPageTable};
+pub use paging_mode::{PageTableLevel, PagingMode};
+pub use pte::{PageAttribute, PTE};
+pub use pte_iterator::PageTableIterator;

+ 132 - 0
crates/eonix_mm/src/page_table/page_table.rs

@@ -0,0 +1,132 @@
+use super::{
+    paging_mode::PageTableLevel,
+    pte_iterator::{KernelIterator, UserIterator},
+    PageAttribute, PagingMode, PTE,
+};
+use crate::{
+    address::{PAddr, VRange},
+    page_table::PageTableIterator,
+    paging::{Page, PageAccess, PageAlloc, PageBlock},
+};
+use core::{marker::PhantomData, ptr::NonNull};
+
+pub trait RawPageTable<'a>: 'a {
+    type Entry: PTE + 'a;
+
+    /// Return the entry at the given index.
+    fn index(&self, index: u16) -> &'a Self::Entry;
+
+    /// Return a mutable reference to the entry at the given index.
+    fn index_mut(&mut self, index: u16) -> &'a mut Self::Entry;
+
+    /// Get the page table pointed to by raw pointer `ptr`.
+    unsafe fn from_ptr(ptr: NonNull<PageBlock>) -> Self;
+}
+
+pub struct PageTable<'a, M, A, X>
+where
+    M: PagingMode,
+    M::Entry: 'a,
+    A: PageAlloc,
+    X: PageAccess,
+{
+    root_table_page: Page<A>,
+    phantom: PhantomData<&'a (M, X)>,
+}
+
+impl<'a, M, A, X> PageTable<'a, M, A, X>
+where
+    M: PagingMode,
+    M::Entry: 'a,
+    A: PageAlloc,
+    X: PageAccess,
+{
+    pub fn new<A1: PageAlloc>(kernel_root_table_page: &Page<A1>) -> Self {
+        let new_root_table_page = Page::<A>::alloc();
+        let new_table_data = X::get_ptr_for_page(&new_root_table_page);
+        let kernel_table_data = X::get_ptr_for_page(kernel_root_table_page);
+
+        unsafe {
+            // SAFETY: `new_table_data` and `kernel_table_data` are both valid pointers
+            //         to **different** page tables.
+            new_table_data.copy_from_nonoverlapping(kernel_table_data, 1);
+        }
+
+        let mut root_page_table = unsafe {
+            // SAFETY: `page_table_ptr` is a valid pointer to a page table.
+            M::RawTable::from_ptr(new_table_data)
+        };
+
+        let level0 = M::LEVELS[0];
+        for idx in 0..level0.max_index() / 2 {
+            // We consider the first half of the page table as user space.
+            // Clear all (potential) user space mappings.
+            root_page_table.index_mut(idx).take();
+        }
+
+        Self {
+            root_table_page: new_root_table_page,
+            phantom: PhantomData,
+        }
+    }
+
+    pub fn addr(&self) -> PAddr {
+        self.root_table_page.start()
+    }
+
+    pub fn iter_user(&self, range: VRange) -> impl Iterator<Item = &mut M::Entry> {
+        let page_table_ptr = X::get_ptr_for_page(&self.root_table_page);
+        let root_page_table = unsafe {
+            // SAFETY: `page_table_ptr` is a valid pointer to a page table.
+            M::RawTable::from_ptr(page_table_ptr)
+        };
+
+        PageTableIterator::<M, A, X, UserIterator>::new(root_page_table, range)
+    }
+
+    pub fn iter_kernel(&self, range: VRange) -> impl Iterator<Item = &mut M::Entry> {
+        let page_table_ptr = X::get_ptr_for_page(&self.root_table_page);
+        let root_page_table = unsafe {
+            // SAFETY: `page_table_ptr` is a valid pointer to a page table.
+            M::RawTable::from_ptr(page_table_ptr)
+        };
+
+        PageTableIterator::<M, A, X, KernelIterator>::new(root_page_table, range)
+    }
+
+    fn drop_page_table_recursive(page_table: &Page<A>, levels: &[PageTableLevel]) {
+        let [level, remaining_levels @ ..] = levels else { return };
+
+        let page_table_ptr = X::get_ptr_for_page(page_table);
+        let mut page_table = unsafe {
+            // SAFETY: `page_table_ptr` is a valid pointer to a page table.
+            M::RawTable::from_ptr(page_table_ptr)
+        };
+
+        for pte in (0..=level.max_index()).map(|i| page_table.index_mut(i)) {
+            let (pfn, attr) = pte.take();
+            if !attr.is_present() || !attr.is_user() {
+                continue;
+            }
+
+            let page_table = unsafe {
+                // SAFETY: We got the pfn from a valid page table entry, so it should be valid.
+                Page::<A>::from_raw(pfn)
+            };
+
+            Self::drop_page_table_recursive(&page_table, remaining_levels);
+        }
+    }
+}
+
+impl<'a, M, A, X> Drop for PageTable<'a, M, A, X>
+where
+    M: PagingMode,
+    M::Entry: 'a,
+    A: PageAlloc,
+    X: PageAccess,
+{
+    fn drop(&mut self) {
+        Self::drop_page_table_recursive(&self.root_table_page, M::LEVELS);
+    }
+}

+ 38 - 0
crates/eonix_mm/src/page_table/paging_mode.rs

@@ -0,0 +1,38 @@
+use super::{RawPageTable, PTE};
+use crate::{
+    address::{Addr as _, VAddr},
+    paging::PFN,
+};
+
+pub trait PagingMode {
+    type Entry: PTE;
+    type RawTable<'a>: RawPageTable<'a, Entry = Self::Entry>;
+
+    const LEVELS: &'static [PageTableLevel];
+    const KERNEL_ROOT_TABLE_PFN: PFN;
+}
+
+#[derive(Clone, Copy, PartialOrd, PartialEq)]
+pub struct PageTableLevel(usize, usize);
+
+impl PageTableLevel {
+    pub const fn new(nth_bit: usize, len: usize) -> Self {
+        Self(nth_bit, len)
+    }
+
+    pub const fn nth_bit(self) -> usize {
+        self.0
+    }
+
+    pub const fn len(self) -> usize {
+        self.1
+    }
+
+    pub const fn max_index(self) -> u16 {
+        (1 << self.len()) - 1
+    }
+
+    pub fn index_of(self, vaddr: VAddr) -> u16 {
+        ((vaddr.addr() >> self.nth_bit()) & ((1 << self.len()) - 1)) as u16
+    }
+}

+ 52 - 0
crates/eonix_mm/src/page_table/pte.rs

@@ -0,0 +1,52 @@
+use crate::paging::PFN;
+
+pub trait PageAttribute: Copy {
+    /// Create a new instance of the attribute with all attributes set to false.
+    fn new() -> Self;
+
+    fn present(self, present: bool) -> Self;
+    fn write(self, write: bool) -> Self;
+    fn execute(self, execute: bool) -> Self;
+    fn user(self, user: bool) -> Self;
+    fn accessed(self, accessed: bool) -> Self;
+    fn dirty(self, dirty: bool) -> Self;
+    fn global(self, global: bool) -> Self;
+    fn copy_on_write(self, cow: bool) -> Self;
+    fn mapped(self, mmap: bool) -> Self;
+    fn anonymous(self, anon: bool) -> Self;
+
+    fn is_present(&self) -> bool;
+    fn is_write(&self) -> bool;
+    fn is_execute(&self) -> bool;
+    fn is_user(&self) -> bool;
+    fn is_accessed(&self) -> bool;
+    fn is_dirty(&self) -> bool;
+    fn is_global(&self) -> bool;
+    fn is_copy_on_write(&self) -> bool;
+    fn is_mapped(&self) -> bool;
+    fn is_anonymous(&self) -> bool;
+}
+
+pub trait PTE: Sized {
+    type Attr: PageAttribute;
+
+    fn set(&mut self, pfn: PFN, attr: Self::Attr);
+    fn get(&self) -> (PFN, Self::Attr);
+    fn take(&mut self) -> (PFN, Self::Attr);
+
+    fn set_pfn(&mut self, pfn: PFN) {
+        self.set(pfn, self.get_attr());
+    }
+
+    fn set_attr(&mut self, attr: Self::Attr) {
+        self.set(self.get_pfn(), attr);
+    }
+
+    fn get_pfn(&self) -> PFN {
+        self.get().0
+    }
+
+    fn get_attr(&self) -> Self::Attr {
+        self.get().1
+    }
+}

+ 177 - 0
crates/eonix_mm/src/page_table/pte_iterator.rs

@@ -0,0 +1,177 @@
+use super::{PageAttribute as _, PagingMode, RawPageTable as _, PTE};
+use crate::{
+    address::{AddrOps as _, VRange},
+    paging::{Page, PageAccess, PageAlloc, PAGE_SIZE},
+};
+use core::marker::PhantomData;
+
+pub struct KernelIterator;
+pub struct UserIterator;
+
+pub trait IteratorType<M: PagingMode> {
+    fn page_table_attributes() -> <M::Entry as PTE>::Attr;
+
+    fn get_page_table<'a, A, X>(pte: &mut M::Entry) -> M::RawTable<'a>
+    where
+        A: PageAlloc,
+        X: PageAccess,
+    {
+        let attr = pte.get_attr();
+
+        if attr.is_present() {
+            let pfn = pte.get_pfn();
+            unsafe {
+                // SAFETY: We are creating a pointer to a page referenced to in
+                //         some page table, which should be valid.
+                let page_table_ptr = X::get_ptr_for_pfn(pfn);
+                // SAFETY: `page_table_ptr` is a valid pointer to a page table.
+                M::RawTable::from_ptr(page_table_ptr)
+            }
+        } else {
+            let page = Page::<A>::alloc();
+            let page_table_ptr = X::get_ptr_for_page(&page);
+
+            unsafe {
+                // SAFETY: `page_table_ptr` is good for writing and properly aligned.
+                page_table_ptr.write_bytes(0, 1);
+            }
+
+            pte.set(page.into_raw(), Self::page_table_attributes());
+
+            unsafe {
+                // SAFETY: `page_table_ptr` is a valid pointer to a page table.
+                M::RawTable::from_ptr(page_table_ptr)
+            }
+        }
+    }
+}
+
+pub struct PageTableIterator<'a, M, A, X, K>
+where
+    M: PagingMode,
+    M::Entry: 'a,
+    A: PageAlloc,
+    X: PageAccess,
+    K: IteratorType<M>,
+{
+    remaining: usize,
+
+    indicies: [u16; 8],
+    tables: [Option<M::RawTable<'a>>; 8],
+
+    _phantom: PhantomData<&'a (A, X, K)>,
+}
+
+impl<'a, M, A, X, K> PageTableIterator<'a, M, A, X, K>
+where
+    M: PagingMode,
+    M::Entry: 'a,
+    A: PageAlloc,
+    X: PageAccess,
+    K: IteratorType<M>,
+{
+    fn parse_tables_starting_from(&mut self, idx_level: usize) {
+        let levels_len = M::LEVELS.len();
+
+        for (idx, &pt_idx) in self
+            .indicies
+            .iter()
+            .enumerate()
+            .take(levels_len - 1)
+            .skip(idx_level)
+        {
+            let [parent_table, child_table] = unsafe {
+                // SAFETY: `idx` and `idx + 1` must not overlap.
+                //         `idx + 1` is always less than `levels_len` since we iterate
+                //         until `levels_len - 1`.
+                self.tables.get_disjoint_unchecked_mut([idx, idx + 1])
+            };
+            let parent_table = parent_table.as_mut().expect("Parent table is None");
+            let next_pte = parent_table.index_mut(pt_idx);
+            child_table.replace(K::get_page_table::<A, X>(next_pte));
+        }
+    }
+
+    pub fn new(page_table: M::RawTable<'a>, range: VRange) -> Self {
+        let start = range.start().floor();
+        let end = range.end().ceil();
+
+        let mut me = Self {
+            remaining: (end - start) / PAGE_SIZE,
+            indicies: [0; 8],
+            tables: [const { None }; 8],
+            _phantom: PhantomData,
+        };
+
+        for (i, level) in M::LEVELS.iter().enumerate() {
+            me.indicies[i] = level.index_of(start);
+        }
+
+        me.tables[0] = Some(page_table);
+        me.parse_tables_starting_from(0);
+
+        me
+    }
+}
+
+impl<'a, M, A, X, K> Iterator for PageTableIterator<'a, M, A, X, K>
+where
+    M: PagingMode,
+    M::Entry: 'a,
+    A: PageAlloc,
+    X: PageAccess,
+    K: IteratorType<M>,
+{
+    type Item = &'a mut M::Entry;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.remaining == 0 {
+            return None;
+        } else {
+            self.remaining -= 1;
+        }
+
+        let len_levels = M::LEVELS.len();
+
+        let retval = self.tables[len_levels - 1]
+            .as_mut()
+            .unwrap()
+            .index_mut(self.indicies[len_levels - 1]);
+
+        let idx_level_start_updating = M::LEVELS
+            .iter()
+            .zip(self.indicies.iter_mut())
+            .enumerate()
+            .rev()
+            .skip_while(|(_, (level, idx))| **idx == level.max_index())
+            .map(|(i, _)| i)
+            .next()
+            .expect("Index out of bounds");
+
+        self.indicies[idx_level_start_updating] += 1;
+        self.indicies[idx_level_start_updating + 1..len_levels].fill(0);
+        self.parse_tables_starting_from(idx_level_start_updating);
+
+        Some(retval)
+    }
+}
+
+impl<M: PagingMode> IteratorType<M> for KernelIterator {
+    fn page_table_attributes() -> <M::Entry as PTE>::Attr {
+        <M::Entry as PTE>::Attr::new()
+            .present(true)
+            .write(true)
+            .execute(true)
+            .global(true)
+    }
+}
+
+impl<M: PagingMode> IteratorType<M> for UserIterator {
+    fn page_table_attributes() -> <M::Entry as PTE>::Attr {
+        <M::Entry as PTE>::Attr::new()
+            .present(true)
+            .write(true)
+            .execute(true)
+            .user(true)
+    }
+}

+ 9 - 0
crates/eonix_mm/src/paging.rs

@@ -0,0 +1,9 @@
+mod page;
+mod page_alloc;
+mod pfn;
+mod raw_page;
+
+pub use page::{Page, PageAccess, PageBlock, PAGE_SIZE, PAGE_SIZE_BITS};
+pub use page_alloc::PageAlloc;
+pub use pfn::PFN;
+pub use raw_page::{PageFlags, RawPage, RawPagePtr};

+ 219 - 0
crates/eonix_mm/src/paging/page.rs

@@ -0,0 +1,219 @@
+use super::{raw_page::RawPagePtr, PageAlloc, PFN};
+use crate::address::{AddrRange, PAddr};
+use core::{fmt, marker::PhantomData, mem::ManuallyDrop, ptr::NonNull, sync::atomic::Ordering};
+
+pub const PAGE_SIZE: usize = 4096;
+pub const PAGE_SIZE_BITS: u32 = PAGE_SIZE.trailing_zeros();
+
+/// A block of memory that is aligned to the page size and can be used for
+/// page-aligned allocations.
+///
+/// This is used to ensure that the memory is properly aligned to the page size.
+#[allow(dead_code)]
+#[repr(align(4096))]
+pub struct PageBlock([u8; PAGE_SIZE]);
+
+/// A trait that provides the kernel access to the page.
+pub trait PageAccess {
+    /// Returns a kernel-accessible pointer to the page referenced by the given
+    /// physical frame number.
+    ///
+    /// # Safety
+    /// This function is unsafe because calling this function on some non-existing
+    /// pfn will cause undefined behavior.
+    unsafe fn get_ptr_for_pfn(pfn: PFN) -> NonNull<PageBlock>;
+
+    /// Returns a kernel-accessible pointer to the given page.
+    fn get_ptr_for_page<A: PageAlloc>(page: &Page<A>) -> NonNull<PageBlock> {
+        unsafe {
+            // SAFETY: `page.pfn()` is guaranteed to be valid.
+            Self::get_ptr_for_pfn(page.pfn())
+        }
+    }
+}
+
+/// A Page allocated in allocator `A`.
+#[derive(PartialEq, Eq, PartialOrd, Ord)]
+pub struct Page<A: PageAlloc> {
+    raw_page: RawPagePtr,
+    _phantom: PhantomData<A>,
+}
+
+unsafe impl<A: PageAlloc> Send for Page<A> {}
+unsafe impl<A: PageAlloc> Sync for Page<A> {}
+
+impl<A: PageAlloc> Page<A> {
+    /// Allocate a page of the given *order*.
+    pub fn alloc_order(order: u32) -> Self {
+        Self {
+            raw_page: A::alloc_order(order).expect("Out of memory"),
+            _phantom: PhantomData,
+        }
+    }
+
+    /// Allocate exactly one page.
+    pub fn alloc() -> Self {
+        Self {
+            raw_page: A::alloc().expect("Out of memory"),
+            _phantom: PhantomData,
+        }
+    }
+
+    /// Allocate a contiguous block of pages that can contain at least `count` pages.
+    pub fn alloc_at_least(count: usize) -> Self {
+        Self {
+            raw_page: A::alloc_at_least(count).expect("Out of memory"),
+            _phantom: PhantomData,
+        }
+    }
+
+    /// Whether we are the only owner of the page.
+    pub fn is_exclusive(&self) -> bool {
+        self.raw_page.refcount().load(Ordering::Acquire) == 1
+    }
+
+    /// Returns the *order* of the page, which is the log2 of the number of pages
+    /// contained in the page object.
+    pub fn order(&self) -> u32 {
+        self.raw_page.order()
+    }
+
+    /// Returns the total size of the page in bytes.
+    pub fn len(&self) -> usize {
+        1 << (self.order() + PAGE_SIZE_BITS)
+    }
+
+    /// Acquire the ownership of the page pointed to by `pfn`, leaving `refcount` untouched.
+    ///
+    /// # Safety
+    /// This function is unsafe because it assumes that the caller has ensured that
+    /// `pfn` points to a valid page allocated through `alloc_order()` and that the
+    /// page have not been freed or deallocated yet.
+    ///
+    /// No checks are done. Any violation of this assumption may lead to undefined behavior.
+    pub unsafe fn from_raw_unchecked(pfn: PFN) -> Self {
+        Self {
+            raw_page: RawPagePtr::from(pfn),
+            _phantom: PhantomData,
+        }
+    }
+
+    /// Acquire the ownership of the page pointed to by `pfn`, leaving `refcount` untouched.
+    ///
+    /// This function is a safe wrapper around `from_paddr_unchecked()` that does **some sort
+    /// of** checks to ensure that the page is valid and managed by the allocator.
+    ///
+    /// # Panic
+    /// This function will panic if the page is not valid or if the page is not managed by
+    /// the allocator.
+    ///
+    /// # Safety
+    /// This function is unsafe because it assumes that the caller has ensured that
+    /// `pfn` points to an existing page (A.K.A. inside the global page array) and the
+    /// page will not be freed or deallocated during the call.
+    pub unsafe fn from_raw(pfn: PFN) -> Self {
+        unsafe {
+            // SAFETY: The caller guarantees that the page is inside the global page array.
+            assert!(A::has_management_over(RawPagePtr::from(pfn)));
+
+            // SAFETY: We've checked that the validity of the page. And the caller guarantees
+            //         that the page will not be freed or deallocated during the call.
+            Self::from_raw_unchecked(pfn)
+        }
+    }
+
+    /// Do some work with the page without touching the reference count with the same
+    /// restrictions as `from_raw()`.
+    ///
+    /// # Safety
+    /// Check `from_raw()` for the safety requirements.
+    pub unsafe fn with_raw<F, O>(pfn: PFN, func: F) -> O
+    where
+        F: FnOnce(&Self) -> O,
+    {
+        unsafe {
+            let me = ManuallyDrop::new(Self::from_raw(pfn));
+            func(&me)
+        }
+    }
+
+    /// Do some work with the page without touching the reference count with the same
+    /// restrictions as `from_raw_unchecked()`.
+    ///
+    /// # Safety
+    /// Check `from_raw_unchecked()` for the safety requirements.
+    pub unsafe fn with_raw_unchecked<F, O>(pfn: PFN, func: F) -> O
+    where
+        F: FnOnce(&Self) -> O,
+    {
+        unsafe {
+            let me = ManuallyDrop::new(Self::from_raw_unchecked(pfn));
+            func(&me)
+        }
+    }
+
+    /// Consumes the `Page` and returns the physical frame number without dropping
+    /// the reference count the page holds.
+    pub fn into_raw(self) -> PFN {
+        let me = ManuallyDrop::new(self);
+        me.pfn()
+    }
+
+    /// Returns the physical frame number of the page, which is aligned with the
+    /// page size and valid.
+    pub fn pfn(&self) -> PFN {
+        PFN::from(self.raw_page)
+    }
+
+    /// Returns the start physical address of the page, which is guaranteed to be
+    /// aligned to the page size and valid.
+    pub fn start(&self) -> PAddr {
+        PAddr::from(self.pfn())
+    }
+
+    /// Returns the physical address range of the page, which is guaranteed to be
+    /// aligned to the page size and valid.
+    pub fn range(&self) -> AddrRange<PAddr> {
+        AddrRange::from(self.start()).grow(self.len())
+    }
+}
+
+impl<A: PageAlloc> Clone for Page<A> {
+    fn clone(&self) -> Self {
+        // SAFETY: Memory order here can be Relaxed is for the same reason as that
+        // in the copy constructor of `std::shared_ptr`.
+        self.raw_page.refcount().fetch_add(1, Ordering::Relaxed);
+
+        Self {
+            raw_page: self.raw_page,
+            _phantom: PhantomData,
+        }
+    }
+}
+
+impl<A: PageAlloc> Drop for Page<A> {
+    fn drop(&mut self) {
+        match self.raw_page.refcount().fetch_sub(1, Ordering::AcqRel) {
+            0 => panic!("Refcount for an in-use page is 0"),
+            1 => unsafe {
+                // SAFETY: `self.raw_page` points to a valid page inside the global page array.
+                assert!(A::has_management_over(self.raw_page));
+
+                // SAFETY: `self.raw_page` is managed by the allocator and we're dropping the page.
+                A::dealloc(self.raw_page)
+            },
+            _ => {}
+        }
+    }
+}
+
+impl<A: PageAlloc> fmt::Debug for Page<A> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(
+            f,
+            "Page({:?}, order={})",
+            usize::from(PFN::from(self.raw_page)),
+            self.order()
+        )
+    }
+}

+ 31 - 0
crates/eonix_mm/src/paging/page_alloc.rs

@@ -0,0 +1,31 @@
+use super::raw_page::RawPagePtr;
+
+pub trait PageAlloc: Sized {
+    /// Allocate a page of the given *order*.
+    fn alloc_order(order: u32) -> Option<RawPagePtr>;
+
+    /// Allocate exactly one page.
+    fn alloc() -> Option<RawPagePtr> {
+        Self::alloc_order(0)
+    }
+
+    /// Allocate a contiguous block of pages that can contain at least `count` pages.
+    fn alloc_at_least(count: usize) -> Option<RawPagePtr> {
+        let order = count.next_power_of_two().trailing_zeros();
+        Self::alloc_order(order)
+    }
+
+    /// Deallocate a page.
+    ///
+    /// # Safety
+    /// This function is unsafe because it assumes that the caller has ensured that
+    /// `page` is allocated in this allocator and never used after this call.
+    unsafe fn dealloc(page_ptr: RawPagePtr);
+
+    /// Check whether the page is allocated and managed by the allocator.
+    ///
+    /// # Safety
+    /// This function is unsafe because it assumes that the caller has ensured that
+    /// `page_ptr` points to a raw page inside the global page array.
+    unsafe fn has_management_over(page_ptr: RawPagePtr) -> bool;
+}

+ 65 - 0
crates/eonix_mm/src/paging/pfn.rs

@@ -0,0 +1,65 @@
+use crate::address::{Addr as _, PAddr};
+use core::{
+    fmt,
+    ops::{Add, Sub},
+};
+
+use super::PAGE_SIZE_BITS;
+
+#[repr(transparent)]
+#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]
+pub struct PFN(usize);
+
+impl From<PFN> for usize {
+    fn from(v: PFN) -> Self {
+        v.0
+    }
+}
+
+impl From<usize> for PFN {
+    fn from(v: usize) -> Self {
+        Self(v)
+    }
+}
+
+impl Sub for PFN {
+    type Output = usize;
+
+    fn sub(self, rhs: Self) -> Self::Output {
+        self.0 - rhs.0
+    }
+}
+
+impl Sub<usize> for PFN {
+    type Output = Self;
+
+    fn sub(self, rhs: usize) -> Self::Output {
+        PFN(self.0 - rhs)
+    }
+}
+
+impl Add<usize> for PFN {
+    type Output = Self;
+
+    fn add(self, rhs: usize) -> Self::Output {
+        PFN(self.0 + rhs)
+    }
+}
+
+impl fmt::Debug for PFN {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "PFN({:#x})", self.0)
+    }
+}
+
+impl From<PAddr> for PFN {
+    fn from(paddr: PAddr) -> Self {
+        Self(paddr.addr() >> PAGE_SIZE_BITS)
+    }
+}
+
+impl PFN {
+    pub const fn from_val(pfn: usize) -> Self {
+        Self(pfn)
+    }
+}

+ 97 - 0
crates/eonix_mm/src/paging/raw_page.rs

@@ -0,0 +1,97 @@
+use super::PFN;
+use core::{
+    ptr::NonNull,
+    sync::atomic::{AtomicU32, AtomicUsize, Ordering},
+};
+use intrusive_list::Link;
+
+const PAGE_ARRAY: NonNull<RawPage> =
+    unsafe { NonNull::new_unchecked(0xffffff8040000000 as *mut _) };
+
+pub struct PageFlags(AtomicU32);
+
+pub struct RawPage {
+    /// This can be used for LRU page swap in the future.
+    ///
+    /// Now only used for free page links in the buddy system.
+    pub link: Link,
+    /// # Safety
+    /// This field is only used in buddy system and is protected by the global lock.
+    pub order: u32,
+    pub flags: PageFlags,
+    pub refcount: AtomicUsize,
+}
+
+#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
+pub struct RawPagePtr(NonNull<RawPage>);
+
+impl PageFlags {
+    pub const PRESENT: u32 = 1 << 0;
+    // pub const LOCKED: u32 = 1 << 1;
+    pub const BUDDY: u32 = 1 << 2;
+    // pub const SLAB: u32 = 1 << 3;
+    // pub const DIRTY: u32 = 1 << 4;
+    pub const FREE: u32 = 1 << 5;
+    pub const LOCAL: u32 = 1 << 6;
+
+    pub fn has(&self, flag: u32) -> bool {
+        (self.0.load(Ordering::Relaxed) & flag) == flag
+    }
+
+    pub fn set(&self, flag: u32) {
+        self.0.fetch_or(flag, Ordering::Relaxed);
+    }
+
+    pub fn clear(&self, flag: u32) {
+        self.0.fetch_and(!flag, Ordering::Relaxed);
+    }
+}
+
+impl RawPagePtr {
+    pub const fn new(ptr: NonNull<RawPage>) -> Self {
+        Self(ptr)
+    }
+
+    pub const fn as_ptr(self) -> *mut RawPage {
+        self.0.as_ptr()
+    }
+
+    pub const fn as_ref<'a>(self) -> &'a RawPage {
+        unsafe { &*self.as_ptr() }
+    }
+
+    pub const fn as_mut<'a>(self) -> &'a mut RawPage {
+        unsafe { &mut *self.as_ptr() }
+    }
+
+    pub const fn order(&self) -> u32 {
+        self.as_ref().order
+    }
+
+    pub const fn flags(&self) -> &PageFlags {
+        &self.as_ref().flags
+    }
+
+    pub const fn refcount(&self) -> &AtomicUsize {
+        &self.as_ref().refcount
+    }
+
+    pub const fn offset(&self, count: usize) -> Self {
+        let new_raw_ptr = unsafe { self.0.add(count) };
+        Self::new(new_raw_ptr)
+    }
+}
+
+impl From<RawPagePtr> for PFN {
+    fn from(value: RawPagePtr) -> Self {
+        let idx = unsafe { value.as_ptr().offset_from(PAGE_ARRAY.as_ptr()) as usize };
+        Self::from(idx)
+    }
+}
+
+impl From<PFN> for RawPagePtr {
+    fn from(pfn: PFN) -> Self {
+        let raw_page_ptr = unsafe { PAGE_ARRAY.add(usize::from(pfn)) };
+        Self::new(raw_page_ptr)
+    }
+}

+ 12 - 0
crates/eonix_percpu/Cargo.toml

@@ -0,0 +1,12 @@
+[package]
+name = "eonix_percpu"
+version = "0.1.0"
+edition = "2024"
+
+[lib]
+proc-macro = true
+
+[dependencies]
+proc-macro2 = "1.0"
+quote = "1.0"
+syn = { version = "2.0", features = ["full"] }

+ 24 - 0
crates/eonix_percpu/src/arch.rs

@@ -0,0 +1,24 @@
+use proc_macro2::TokenStream;
+use quote::quote;
+use syn::{Ident, Type};
+
+/// Get the base address for percpu variables of the current thread.
+pub fn get_percpu_pointer(percpu: &Ident, ty: &Type) -> TokenStream {
+    quote! {
+        {
+            #[cfg(target_arch = "x86_64")]
+            {
+                let base: *mut #ty;
+                ::core::arch::asm!(
+                    "mov %gs:0, {address}",
+                    "add ${percpu_pointer}, {address}",
+                    percpu_pointer = sym #percpu,
+                    address = out(reg) base,
+                    options(att_syntax)
+                );
+                base
+            }
+        }
+    }
+    .into()
+}

+ 181 - 0
crates/eonix_percpu/src/lib.rs

@@ -0,0 +1,181 @@
+extern crate proc_macro;
+
+use proc_macro::TokenStream;
+use quote::{format_ident, quote};
+use syn::{parse_macro_input, ItemStatic};
+
+mod arch;
+
+#[proc_macro_attribute]
+pub fn define_percpu(attrs: TokenStream, item: TokenStream) -> TokenStream {
+    if !attrs.is_empty() {
+        panic!("`define_percpu` attribute does not take any arguments");
+    }
+
+    let item = parse_macro_input!(item as ItemStatic);
+    let vis = &item.vis;
+    let ident = &item.ident;
+    let ty = &item.ty;
+    let expr = &item.expr;
+
+    let is_bool = quote!(#ty).to_string().as_str() == "bool";
+    let is_integer =
+        ["u8", "u16", "u32", "u64", "usize"].contains(&quote!(#ty).to_string().as_str());
+
+    let is_atomic_like = is_bool || is_integer || quote!(#ty).to_string().contains("NonNull");
+
+    let inner_ident = format_ident!("_percpu_inner_{}", ident);
+    let access_ident = format_ident!("_access_{}", ident);
+
+    let integer_methods = if is_integer {
+        quote! {
+            pub fn add(&self, value: #ty) {
+                *unsafe { self.as_mut() } += value;
+            }
+
+            pub fn sub(&self, value: #ty) {
+                *unsafe { self.as_mut() } -= value;
+            }
+        }
+    } else {
+        quote! {}
+    };
+
+    let preempt_disable = if !is_atomic_like {
+        quote! { eonix_preempt::disable(); }
+    } else {
+        quote! {}
+    };
+
+    let preempt_enable = if !is_atomic_like {
+        quote! { eonix_preempt::enable(); }
+    } else {
+        quote! {}
+    };
+
+    let as_ptr = arch::get_percpu_pointer(&inner_ident, &ty);
+
+    quote! {
+        #[link_section = ".percpu"]
+        #[allow(non_upper_case_globals)]
+        static mut #inner_ident: #ty = #expr;
+        #[allow(non_camel_case_types)]
+        #vis struct #access_ident;
+        #vis static #ident: #access_ident = #access_ident;
+
+        impl #access_ident {
+            /// # Safety
+            /// This function is unsafe because it allows for mutable aliasing of the percpu
+            /// variable.
+            /// Make sure that preempt is disabled when calling this function.
+            pub unsafe fn as_ptr(&self) -> *mut #ty {
+                #as_ptr
+            }
+
+            pub fn get(&self) -> #ty {
+                #preempt_disable
+                let value = unsafe { self.as_ptr().read() };
+                #preempt_enable
+                value
+            }
+
+            pub fn set(&self, value: #ty) {
+                #preempt_disable
+                unsafe { self.as_ptr().write(value) }
+                #preempt_enable
+            }
+
+            pub fn swap(&self, mut value: #ty) -> #ty {
+                #preempt_disable
+                unsafe { self.as_ptr().swap(&mut value) }
+                #preempt_enable
+                value
+            }
+
+            /// # Safety
+            /// This function is unsafe because it allows for immutable aliasing of the percpu
+            /// variable.
+            /// Make sure that preempt is disabled when calling this function.
+            pub unsafe fn as_ref(&self) -> & #ty {
+                // SAFETY: This is safe because `as_ptr()` is guaranteed to be valid.
+                self.as_ptr().as_ref().unwrap()
+            }
+
+            /// # Safety
+            /// This function is unsafe because it allows for mutable aliasing of the percpu
+            /// variable.
+            /// Make sure that preempt is disabled when calling this function.
+            pub unsafe fn as_mut(&self) -> &mut #ty {
+                // SAFETY: This is safe because `as_ptr()` is guaranteed to be valid.
+                self.as_ptr().as_mut().unwrap()
+            }
+
+            #integer_methods
+        }
+    }
+    .into()
+}
+
+#[proc_macro_attribute]
+pub fn define_percpu_shared(attrs: TokenStream, item: TokenStream) -> TokenStream {
+    if !attrs.is_empty() {
+        panic!("`define_percpu_shared` attribute does not take any arguments");
+    }
+
+    let item = parse_macro_input!(item as ItemStatic);
+    let vis = &item.vis;
+    let ident = &item.ident;
+    let ty = &item.ty;
+    let expr = &item.expr;
+
+    let inner_ident = format_ident!("_percpu_shared_inner_{}", ident);
+    let access_ident = format_ident!("_access_shared_{}", ident);
+
+    let as_ptr = arch::get_percpu_pointer(&inner_ident, &ty);
+
+    quote! {
+        #[link_section = ".percpu"]
+        #[allow(non_upper_case_globals)]
+        static #inner_ident: #ty = #expr;
+        #[allow(non_camel_case_types)]
+        #vis struct #access_ident;
+        #vis static #ident: #access_ident = #access_ident;
+
+        impl #access_ident {
+            fn as_ptr(&self) -> *const #ty {
+                unsafe { ( #as_ptr ) }
+            }
+
+            pub fn get_ref(&self) -> & #ty {
+                // SAFETY: This is safe because `as_ptr()` is guaranteed to be valid.
+                unsafe { self.as_ptr().as_ref().unwrap() }
+            }
+
+            pub fn get_for_cpu(&self, cpuid: usize) -> Option<& #ty > {
+                let offset = & #inner_ident as *const _ as usize;
+                let base = ::arch::PercpuArea::get_for(cpuid);
+                base.map(|base| unsafe { base.byte_add(offset).cast().as_ref() })
+            }
+        }
+
+        impl ::core::ops::Deref for #access_ident {
+            type Target = #ty;
+
+            fn deref(&self) -> &Self::Target {
+                self.get_ref()
+            }
+        }
+
+        impl<T> ::core::convert::AsRef<T> for #access_ident
+        where
+            <Self as ::core::ops::Deref>::Target: ::core::convert::AsRef<T>,
+        {
+            fn as_ref(&self) -> &T {
+                use ::core::ops::Deref;
+
+                self.deref().as_ref()
+            }
+        }
+    }
+    .into()
+}

+ 15 - 0
crates/eonix_preempt/src/lib.rs

@@ -65,3 +65,18 @@ macro_rules! assert_preempt_count_eq {
         );
     }};
 }
+
+#[unsafe(no_mangle)]
+pub extern "C" fn r_preempt_disable() {
+    disable();
+}
+
+#[unsafe(no_mangle)]
+pub extern "C" fn r_preempt_enable() {
+    enable();
+}
+
+#[unsafe(no_mangle)]
+pub extern "C" fn r_preempt_count() -> usize {
+    count()
+}

+ 0 - 1
crates/eonix_runtime/Cargo.toml

@@ -12,7 +12,6 @@ eonix_sync = { path = "../eonix_sync" }
 pointers = { path = "../pointers" }
 
 intrusive-collections = "0.9.7"
-lazy_static = { version = "1.5.0", features = ["spin_no_std"] }
 
 [features]
 default = []

+ 7 - 19
crates/eonix_runtime/src/executor.rs

@@ -4,7 +4,7 @@ mod output_handle;
 mod stack;
 
 use crate::{
-    run::{Contexted, PinRun, RunState},
+    run::{Contexted, Run, RunState},
     scheduler::Scheduler,
     task::Task,
 };
@@ -21,7 +21,7 @@ pub use execute_status::ExecuteStatus;
 pub use output_handle::OutputHandle;
 pub use stack::Stack;
 
-/// An `Executor` executes a `PinRun` object in a separate thread of execution
+/// An `Executor` executes a `Run` object in a separate thread of execution
 /// where we have a dedicated stack and context.
 pub trait Executor: Send {
     fn progress(&self) -> ExecuteStatus;
@@ -29,7 +29,7 @@ pub trait Executor: Send {
 
 struct RealExecutor<S, R>
 where
-    R: PinRun + Send + Contexted + 'static,
+    R: Run + Send + Contexted + 'static,
     R::Output: Send,
 {
     _stack: S,
@@ -40,7 +40,7 @@ where
 
 impl<S, R> RealExecutor<S, R>
 where
-    R: PinRun + Send + Contexted + 'static,
+    R: Run + Send + Contexted + 'static,
     R::Output: Send,
 {
     extern "C" fn execute(self: Pin<&Self>) -> ! {
@@ -59,21 +59,9 @@ where
                 let mut pinned_runnable =
                     unsafe { Pin::new_unchecked(&mut *(runnable_pointer as *mut R)) };
 
-                match pinned_runnable.as_mut().pinned_run(&waker) {
+                match pinned_runnable.as_mut().run(&waker) {
                     RunState::Finished(output) => break output,
-                    RunState::Running => {
-                        if Task::current().is_runnable() {
-                            continue;
-                        }
-
-                        // We need to set the preempt count to 0 to allow preemption.
-                        eonix_preempt::disable();
-
-                        // SAFETY: We are in the scheduler context and preemption is disabled.
-                        unsafe { Scheduler::goto_scheduler(&Task::current().execution_context) };
-
-                        eonix_preempt::enable();
-                    }
+                    RunState::Running => Task::park(),
                 }
             };
 
@@ -96,7 +84,7 @@ where
 impl<S, R> Executor for RealExecutor<S, R>
 where
     S: Send,
-    R: PinRun + Contexted + Send,
+    R: Run + Contexted + Send,
     R::Output: Send,
 {
     fn progress(&self) -> ExecuteStatus {

+ 3 - 3
crates/eonix_runtime/src/executor/builder.rs

@@ -1,7 +1,7 @@
 use super::{Executor, OutputHandle, RealExecutor, Stack};
 use crate::{
     context::ExecutionContext,
-    run::{Contexted, PinRun},
+    run::{Contexted, Run},
 };
 use alloc::{boxed::Box, sync::Arc};
 use core::{pin::Pin, sync::atomic::AtomicBool};
@@ -15,7 +15,7 @@ pub struct ExecutorBuilder<S, R> {
 impl<S, R> ExecutorBuilder<S, R>
 where
     S: Stack,
-    R: PinRun + Contexted + Send + 'static,
+    R: Run + Contexted + Send + 'static,
     R::Output: Send,
 {
     pub fn new() -> Self {
@@ -48,7 +48,7 @@ where
         let mut execution_context = ExecutionContext::new();
         let output_handle = OutputHandle::new();
 
-        execution_context.set_sp(stack.get_bottom() as *const _ as _);
+        execution_context.set_sp(stack.get_bottom().addr().get() as _);
 
         let executor = Box::pin(RealExecutor {
             _stack: stack,

+ 3 - 1
crates/eonix_runtime/src/executor/stack.rs

@@ -1,4 +1,6 @@
+use core::ptr::NonNull;
+
 pub trait Stack: Sized + Send {
     fn new() -> Self;
-    fn get_bottom(&self) -> &();
+    fn get_bottom(&self) -> NonNull<()>;
 }

+ 5 - 10
crates/eonix_runtime/src/ready_queue.rs

@@ -2,8 +2,8 @@ use crate::task::Task;
 use alloc::{collections::VecDeque, sync::Arc};
 use eonix_sync::Spin;
 
-#[arch::define_percpu]
-static READYQUEUE: Option<Spin<FifoReadyQueue>> = None;
+#[arch::define_percpu_shared]
+static READYQUEUE: Spin<FifoReadyQueue> = Spin::new(FifoReadyQueue::new());
 
 pub trait ReadyQueue {
     fn get(&mut self) -> Option<Arc<Task>>;
@@ -33,14 +33,9 @@ impl ReadyQueue for FifoReadyQueue {
 }
 
 pub fn local_rq() -> &'static Spin<dyn ReadyQueue> {
-    // SAFETY: When we use ReadyQueue on this CPU, we will lock it with `lock_irq()`
-    //         and if we use ReadyQueue on other CPU, we won't be able to touch it on this CPU.
-    //         So no issue here.
-    unsafe { READYQUEUE.as_ref() }
-        .as_ref()
-        .expect("ReadyQueue should be initialized")
+    &*READYQUEUE
 }
 
-pub fn init_local_rq() {
-    READYQUEUE.set(Some(Spin::new(FifoReadyQueue::new())));
+pub fn cpu_rq(cpuid: usize) -> &'static Spin<dyn ReadyQueue> {
+    READYQUEUE.get_for_cpu(cpuid).expect("CPU not found")
 }

+ 3 - 29
crates/eonix_runtime/src/run.rs

@@ -21,40 +21,14 @@ pub trait Contexted {
 pub trait Run {
     type Output;
 
-    fn run(&mut self, waker: &Waker) -> RunState<Self::Output>;
+    fn run(self: Pin<&mut Self>, waker: &Waker) -> RunState<Self::Output>;
 
-    fn join(&mut self, waker: &Waker) -> Self::Output {
+    fn join(mut self: Pin<&mut Self>, waker: &Waker) -> Self::Output {
         loop {
-            match self.run(waker) {
+            match self.as_mut().run(waker) {
                 RunState::Running => continue,
                 RunState::Finished(output) => break output,
             }
         }
     }
 }
-
-pub trait PinRun {
-    type Output;
-
-    fn pinned_run(self: Pin<&mut Self>, waker: &Waker) -> RunState<Self::Output>;
-
-    fn pinned_join(mut self: Pin<&mut Self>, waker: &Waker) -> Self::Output {
-        loop {
-            match self.as_mut().pinned_run(waker) {
-                RunState::Running => continue,
-                RunState::Finished(output) => break output,
-            }
-        }
-    }
-}
-
-impl<R> Run for R
-where
-    R: PinRun + Unpin,
-{
-    type Output = R::Output;
-
-    fn run(&mut self, waker: &Waker) -> RunState<Self::Output> {
-        Pin::new(self).pinned_run(waker)
-    }
-}

+ 3 - 3
crates/eonix_runtime/src/run/future_run.rs

@@ -1,4 +1,4 @@
-use super::{Contexted, PinRun, RunState};
+use super::{Contexted, Run, RunState};
 use core::{
     pin::Pin,
     task::{Context, Poll, Waker},
@@ -16,13 +16,13 @@ where
 }
 
 impl<F> Contexted for FutureRun<F> where F: Future {}
-impl<F> PinRun for FutureRun<F>
+impl<F> Run for FutureRun<F>
 where
     F: Future + 'static,
 {
     type Output = F::Output;
 
-    fn pinned_run(self: Pin<&mut Self>, waker: &Waker) -> RunState<Self::Output> {
+    fn run(self: Pin<&mut Self>, waker: &Waker) -> RunState<Self::Output> {
         let mut future = unsafe { self.map_unchecked_mut(|me| &mut me.0) };
         let mut context = Context::from_waker(waker);
 

+ 57 - 82
crates/eonix_runtime/src/scheduler.rs

@@ -1,24 +1,21 @@
 use crate::{
     context::ExecutionContext,
     executor::{ExecuteStatus, OutputHandle, Stack},
-    ready_queue::{init_local_rq, local_rq, ReadyQueue},
-    run::{Contexted, PinRun},
+    ready_queue::{cpu_rq, local_rq},
+    run::{Contexted, Run},
     task::{Task, TaskAdapter, TaskHandle},
 };
 use alloc::sync::Arc;
 use core::{
-    future::Future,
     mem::forget,
-    pin::Pin,
     ptr::NonNull,
     sync::atomic::{compiler_fence, Ordering},
-    task::{Context, Poll, Waker},
+    task::Waker,
 };
 use eonix_log::println_trace;
 use eonix_preempt::assert_preempt_count_eq;
-use eonix_sync::Spin;
+use eonix_sync::{LazyLock, Spin};
 use intrusive_collections::RBTree;
-use lazy_static::lazy_static;
 use pointers::BorrowedArc;
 
 #[arch::define_percpu]
@@ -27,9 +24,8 @@ static CURRENT_TASK: Option<NonNull<Task>> = None;
 #[arch::define_percpu]
 static LOCAL_SCHEDULER_CONTEXT: ExecutionContext = ExecutionContext::new();
 
-lazy_static! {
-    static ref TASKS: Spin<RBTree<TaskAdapter>> = Spin::new(RBTree::new(TaskAdapter::new()));
-}
+static TASKS: LazyLock<Spin<RBTree<TaskAdapter>>> =
+    LazyLock::new(|| Spin::new(RBTree::new(TaskAdapter::new())));
 
 pub struct Scheduler;
 
@@ -88,8 +84,6 @@ impl Scheduler {
     where
         S: Stack,
     {
-        init_local_rq();
-
         let stack = S::new();
 
         unsafe {
@@ -97,7 +91,7 @@ impl Scheduler {
             // SAFETY: Preemption is disabled.
             let context: &mut ExecutionContext = LOCAL_SCHEDULER_CONTEXT.as_mut();
             context.set_ip(local_scheduler as _);
-            context.set_sp(stack.get_bottom() as *const _ as usize);
+            context.set_sp(stack.get_bottom().addr().get() as usize);
             eonix_preempt::enable();
         }
 
@@ -140,22 +134,39 @@ impl Scheduler {
         unsafe { TASKS.lock().cursor_mut_from_ptr(task as *const _).remove() };
     }
 
-    fn select_rq_for_task(&self, _task: &Task) -> &'static Spin<dyn ReadyQueue> {
-        // TODO: Select an appropriate ready queue.
-        local_rq()
+    fn select_cpu_for_task(&self, task: &Task) -> usize {
+        task.cpu.load(Ordering::Relaxed) as _
     }
 
     pub fn activate(&self, task: &Arc<Task>) {
-        if !task.on_rq.swap(true, Ordering::AcqRel) {
-            let rq = self.select_rq_for_task(&task);
-            rq.lock_irq().put(task.clone());
+        // Only one cpu can be activating the task at a time.
+        // TODO: Add some checks.
+
+        if task.on_rq.swap(true, Ordering::Acquire) {
+            // Lock the rq and check whether the task is on the rq again.
+            let cpuid = task.cpu.load(Ordering::Acquire);
+            let mut rq = cpu_rq(cpuid as _).lock_irq();
+
+            if !task.on_rq.load(Ordering::Acquire) {
+                // Task has just got off the rq. Put it back.
+                rq.put(task.clone());
+            } else {
+                // Task is already on the rq. Do nothing.
+                return;
+            }
+        } else {
+            // Task not on some rq. Select one and put it here.
+            let cpu = self.select_cpu_for_task(&task);
+            let mut rq = cpu_rq(cpu).lock_irq();
+            task.cpu.store(cpu as _, Ordering::Release);
+            rq.put(task.clone());
         }
     }
 
     pub fn spawn<S, R>(&self, runnable: R) -> JoinHandle<R::Output>
     where
         S: Stack + 'static,
-        R: PinRun + Contexted + Send + 'static,
+        R: Run + Contexted + Send + 'static,
         R::Output: Send + 'static,
     {
         let TaskHandle {
@@ -191,81 +202,44 @@ impl Scheduler {
         }
         eonix_preempt::enable();
     }
-
-    pub async fn yield_now() {
-        struct Yield(bool);
-
-        impl Future for Yield {
-            type Output = ();
-
-            fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
-                match *self {
-                    Yield(true) => Poll::Ready(()),
-                    Yield(false) => {
-                        self.set(Yield(true));
-                        cx.waker().wake_by_ref();
-                        Poll::Pending
-                    }
-                }
-            }
-        }
-
-        Yield(false).await
-    }
-
-    pub async fn sleep() {
-        struct Sleep(bool);
-
-        impl Future for Sleep {
-            type Output = ();
-
-            fn poll(mut self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<Self::Output> {
-                match *self {
-                    Sleep(true) => Poll::Ready(()),
-                    Sleep(false) => {
-                        self.set(Sleep(true));
-                        Poll::Pending
-                    }
-                }
-            }
-        }
-
-        Sleep(false).await
-    }
 }
 
 extern "C" fn local_scheduler() -> ! {
     loop {
         assert_preempt_count_eq!(1, "Scheduler::idle_task");
+        let mut rq = local_rq().lock_irq();
+
         let previous_task = CURRENT_TASK
             .get()
             .map(|ptr| unsafe { Arc::from_raw(ptr.as_ptr()) });
-        let next_task = local_rq().lock().get();
+        let next_task = rq.get();
 
         match (previous_task, next_task) {
             (None, None) => {
                 // Nothing to do, halt the cpu and rerun the loop.
+                drop(rq);
                 arch::halt();
                 continue;
             }
             (None, Some(next)) => {
                 CURRENT_TASK.set(NonNull::new(Arc::into_raw(next) as *mut _));
             }
-            (Some(previous), None) if previous.is_runnable() => {
-                // Previous thread is `Running`, return to the current running thread.
-                println_trace!(
-                    "trace_scheduler",
-                    "Returning to task id({}) without doing context switch",
-                    previous.id
-                );
-
-                CURRENT_TASK.set(NonNull::new(Arc::into_raw(previous) as *mut _));
-            }
             (Some(previous), None) => {
-                // Nothing to do, halt the cpu and rerun the loop.
-                CURRENT_TASK.set(NonNull::new(Arc::into_raw(previous) as *mut _));
-                arch::halt();
-                continue;
+                if previous.state.is_running() {
+                    // Previous thread is `Running`, return to the current running thread.
+                    println_trace!(
+                        "trace_scheduler",
+                        "Returning to task id({}) without doing context switch",
+                        previous.id
+                    );
+                    CURRENT_TASK.set(NonNull::new(Arc::into_raw(previous) as *mut _));
+                } else {
+                    // Nothing to do, halt the cpu and rerun the loop.
+                    CURRENT_TASK.set(NonNull::new(Arc::into_raw(previous) as *mut _));
+                    drop(rq);
+                    arch::halt();
+                    continue;
+                }
             }
             (Some(previous), Some(next)) => {
                 println_trace!(
@@ -277,13 +251,9 @@ extern "C" fn local_scheduler() -> ! {
 
                 debug_assert_ne!(previous.id, next.id, "Switching to the same task");
 
-                let mut rq = local_rq().lock();
-                if previous.is_runnable() {
+                if previous.state.is_running() || !previous.state.try_park() {
                     rq.put(previous);
                 } else {
-                    // TODO!!!!!!!!!: There is a race condition here if we reach here and there
-                    // is another thread waking the task up. They might read `on_rq` == true so
-                    // the task will never be waken up.
                     previous.on_rq.store(false, Ordering::Release);
                 }
 
@@ -291,9 +261,14 @@ extern "C" fn local_scheduler() -> ! {
             }
         }
 
+        drop(rq);
         // TODO: We can move the release of finished tasks to some worker thread.
         if let ExecuteStatus::Finished = Task::current().run() {
-            Scheduler::remove_task(&Task::current());
+            let current = CURRENT_TASK
+                .swap(None)
+                .map(|ptr| unsafe { Arc::from_raw(ptr.as_ptr()) })
+                .expect("Current task should be present");
+            Scheduler::remove_task(&current);
         }
     }
 }

+ 100 - 71
crates/eonix_runtime/src/task.rs

@@ -4,16 +4,17 @@ mod task_state;
 use crate::{
     context::ExecutionContext,
     executor::{ExecuteStatus, Executor, ExecutorBuilder, OutputHandle, Stack},
-    run::{Contexted, PinRun},
+    run::{Contexted, Run},
     scheduler::Scheduler,
 };
 use alloc::{boxed::Box, sync::Arc, task::Wake};
 use atomic_unique_refcell::AtomicUniqueRefCell;
 use core::{
-    pin::Pin,
+    pin::{pin, Pin},
     sync::atomic::{AtomicBool, AtomicU32, Ordering},
-    task::Waker,
+    task::{Context, Poll, Waker},
 };
+use eonix_preempt::assert_preempt_enabled;
 use eonix_sync::Spin;
 use intrusive_collections::RBTreeAtomicLink;
 use task_state::TaskState;
@@ -23,8 +24,6 @@ pub use adapter::TaskAdapter;
 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
 pub struct TaskId(u32);
 
-pub struct UniqueWaker(Arc<Task>);
-
 pub struct TaskHandle<Output>
 where
     Output: Send,
@@ -34,35 +33,49 @@ where
 }
 
 /// A `Task` represents a schedulable unit.
+///
+/// Initial: state = Running, unparked = false
+///
+/// Task::park() => swap state <- Parking, assert prev == Running
+///              => swap unparked <- false
+///              -> true => store state <- Running => return
+///              -> false => goto scheduler => get rq lock => load state
+///                                                        -> Running => enqueue
+///                                                        -> Parking => cmpxchg Parking -> Parked
+///                                                                   -> Running => enqueue
+///                                                                   -> Parking => on_rq <- false
+///                                                                   -> Parked => ???
+///
+/// Task::unpark() => swap unparked <- true
+///                -> true => return
+///                -> false => swap state <- Running
+///                         -> Running => return
+///                         -> Parking | Parked => Scheduler::activate
 pub struct Task {
     /// Unique identifier of the task.
     pub id: TaskId,
-    /// Whether the task is on some run queue.
+    /// Whether the task is on some run queue (a.k.a ready).
     pub(crate) on_rq: AtomicBool,
+    /// Whether someone has called `unpark` on this task.
+    pub(crate) unparked: AtomicBool,
+    /// The last cpu that the task was executed on.
+    /// If `on_rq` is `false`, we can't assume that this task is still on the cpu.
+    pub(crate) cpu: AtomicU32,
+    /// Task state.
+    pub(crate) state: TaskState,
     /// Task execution context.
     pub(crate) execution_context: ExecutionContext,
     /// Executor object.
     executor: AtomicUniqueRefCell<Option<Pin<Box<dyn Executor>>>>,
-    /// Task state.
-    state: TaskState,
     /// Link in the global task list.
     link_task_list: RBTreeAtomicLink,
 }
 
-impl<Output> TaskHandle<Output>
-where
-    Output: Send,
-{
-    pub fn waker(&self) -> Waker {
-        Waker::from(self.task.clone())
-    }
-}
-
 impl Task {
     pub fn new<S, R>(runnable: R) -> TaskHandle<R::Output>
     where
         S: Stack + 'static,
-        R: PinRun + Contexted + Send + 'static,
+        R: Run + Contexted + Send + 'static,
         R::Output: Send + 'static,
     {
         static ID: AtomicU32 = AtomicU32::new(0);
@@ -75,9 +88,11 @@ impl Task {
         let task = Arc::new(Self {
             id: TaskId(ID.fetch_add(1, Ordering::Relaxed)),
             on_rq: AtomicBool::new(false),
+            unparked: AtomicBool::new(false),
+            cpu: AtomicU32::new(0),
+            state: TaskState::new(TaskState::RUNNING),
             executor: AtomicUniqueRefCell::new(Some(executor)),
             execution_context,
-            state: TaskState::new(TaskState::RUNNING),
             link_task_list: RBTreeAtomicLink::new(),
         });
 
@@ -87,41 +102,6 @@ impl Task {
         }
     }
 
-    pub fn is_runnable(&self) -> bool {
-        self.state.is_runnable()
-    }
-
-    pub(super) fn set_usleep(&self) {
-        let prev_state = self.state.swap(TaskState::USLEEP);
-        assert_eq!(
-            prev_state,
-            TaskState::RUNNING,
-            "Trying to set task {} usleep that is not running",
-            self.id.0
-        );
-    }
-
-    pub fn usleep(self: &Arc<Self>) -> Arc<UniqueWaker> {
-        // No need to dequeue. We have proved that the task is running so not in the queue.
-        self.set_usleep();
-
-        Arc::new(UniqueWaker(self.clone()))
-    }
-
-    pub fn isleep(self: &Arc<Self>) -> Arc<Self> {
-        // No need to dequeue. We have proved that the task is running so not in the queue.
-        let prev_state = self.state.cmpxchg(TaskState::RUNNING, TaskState::ISLEEP);
-
-        assert_eq!(
-            prev_state,
-            TaskState::RUNNING,
-            "Trying to sleep task {} that is not running",
-            self.id.0
-        );
-
-        self.clone()
-    }
-
     pub fn run(&self) -> ExecuteStatus {
         let mut executor_borrow = self.executor.borrow();
 
@@ -133,39 +113,88 @@ impl Task {
 
         if let ExecuteStatus::Finished = executor.progress() {
             executor_borrow.take();
-            self.set_usleep();
             ExecuteStatus::Finished
         } else {
             ExecuteStatus::Executing
         }
     }
-}
 
-impl Wake for Task {
-    fn wake(self: Arc<Self>) {
-        self.wake_by_ref();
+    pub fn unpark(self: &Arc<Self>) {
+        if self.unparked.swap(true, Ordering::Release) {
+            return;
+        }
+
+        eonix_preempt::disable();
+
+        match self.state.swap(TaskState::RUNNING) {
+            TaskState::RUNNING => {}
+            TaskState::PARKED | TaskState::PARKING => {
+                // We are waking up from sleep or someone else is parking this task.
+                // Try to wake it up.
+                Scheduler::get().activate(self);
+            }
+            _ => unreachable!(),
+        }
+
+        eonix_preempt::enable();
     }
 
-    fn wake_by_ref(self: &Arc<Self>) {
-        match self.state.cmpxchg(TaskState::ISLEEP, TaskState::RUNNING) {
-            TaskState::RUNNING | TaskState::USLEEP => return,
-            TaskState::ISLEEP => Scheduler::get().activate(self),
-            state => panic!("Invalid transition from state {:?} to `Running`", state),
+    pub fn park() {
+        eonix_preempt::disable();
+        Self::park_preempt_disabled();
+    }
+
+    /// Park the current task with `preempt::count() == 1`.
+    pub fn park_preempt_disabled() {
+        let task = Task::current();
+
+        let old_state = task.state.swap(TaskState::PARKING);
+        assert_eq!(
+            old_state,
+            TaskState::RUNNING,
+            "Parking a task that is not running."
+        );
+
+        if task.unparked.swap(false, Ordering::AcqRel) {
+            // Someone has called `unpark` on this task previously.
+            task.state.swap(TaskState::RUNNING);
+        } else {
+            unsafe {
+                // SAFETY: Preemption is disabled.
+                Scheduler::goto_scheduler(&Task::current().execution_context)
+            };
+            assert!(task.unparked.swap(false, Ordering::Acquire));
+        }
+
+        eonix_preempt::enable();
+    }
+
+    pub fn block_on<F>(future: F) -> F::Output
+    where
+        F: Future,
+    {
+        assert_preempt_enabled!("block_on() must be called with preemption enabled");
+
+        let waker = Waker::from(Task::current().clone());
+        let mut context = Context::from_waker(&waker);
+        let mut future = pin!(future);
+
+        loop {
+            if let Poll::Ready(output) = future.as_mut().poll(&mut context) {
+                break output;
+            }
+
+            Task::park();
         }
     }
 }
 
-impl Wake for UniqueWaker {
+impl Wake for Task {
     fn wake(self: Arc<Self>) {
         self.wake_by_ref();
     }
 
     fn wake_by_ref(self: &Arc<Self>) {
-        let Self(task) = &**self;
-
-        let prev_state = task.state.swap(TaskState::RUNNING);
-        assert_eq!(prev_state, TaskState::USLEEP);
-
-        Scheduler::get().activate(task);
+        self.unpark();
     }
 }

+ 1 - 2
crates/eonix_runtime/src/task/adapter.rs

@@ -1,8 +1,7 @@
+use super::{Task, TaskId};
 use alloc::sync::Arc;
 use intrusive_collections::{intrusive_adapter, KeyAdapter, RBTreeAtomicLink};
 
-use super::{Task, TaskId};
-
 intrusive_adapter!(pub TaskAdapter = Arc<Task>: Task { link_task_list: RBTreeAtomicLink });
 
 impl<'a> KeyAdapter<'a> for TaskAdapter {

+ 16 - 9
crates/eonix_runtime/src/task/task_state.rs

@@ -5,24 +5,31 @@ pub struct TaskState(AtomicU32);
 
 impl TaskState {
     pub const RUNNING: u32 = 0;
-    pub const ISLEEP: u32 = 1;
-    pub const USLEEP: u32 = 2;
+    pub const PARKING: u32 = 1;
+    pub const PARKED: u32 = 2;
 
-    pub const fn new(state: u32) -> Self {
+    pub(crate) const fn new(state: u32) -> Self {
         Self(AtomicU32::new(state))
     }
 
-    pub fn swap(&self, state: u32) -> u32 {
+    pub(crate) fn swap(&self, state: u32) -> u32 {
         self.0.swap(state, Ordering::AcqRel)
     }
 
-    pub fn cmpxchg(&self, current: u32, new: u32) -> u32 {
-        self.0
-            .compare_exchange(current, new, Ordering::AcqRel, Ordering::Relaxed)
-            .unwrap_or_else(|x| x)
+    pub(crate) fn try_park(&self) -> bool {
+        match self.0.compare_exchange(
+            TaskState::PARKING,
+            TaskState::PARKED,
+            Ordering::AcqRel,
+            Ordering::Acquire,
+        ) {
+            Ok(_) => true,
+            Err(TaskState::RUNNING) => false,
+            Err(_) => unreachable!("Invalid task state while trying to park."),
+        }
     }
 
-    pub fn is_runnable(&self) -> bool {
+    pub(crate) fn is_running(&self) -> bool {
         self.0.load(Ordering::Acquire) == Self::RUNNING
     }
 }

+ 6 - 0
crates/eonix_sync/Cargo.toml

@@ -4,4 +4,10 @@ version = "0.1.0"
 edition = "2024"
 
 [dependencies]
+arch = { path = "../../arch" }
 eonix_preempt = { path = "../eonix_preempt" }
+intrusive-collections = "0.9.7"
+
+[features]
+default = []
+no_check_locked = []

+ 10 - 153
crates/eonix_sync/src/guard.rs

@@ -1,159 +1,16 @@
-use crate::{Lock, LockStrategy};
-use core::{
-    mem::ManuallyDrop,
-    ops::{Deref, DerefMut},
-    ptr,
-};
+pub trait UnlockableGuard {
+    type Unlocked: UnlockedGuard<Guard = Self>;
 
-pub struct Guard<'a, T, S, L, const WRITE: bool = true>
-where
-    T: ?Sized,
-    S: LockStrategy,
-    L: LockStrategy,
-{
-    pub(crate) lock: &'a Lock<T, L>,
-    pub(crate) strategy_data: &'a S::StrategyData,
-    pub(crate) context: S::GuardContext,
-}
-
-pub struct UnlockedGuard<'a, T, S, L, const WRITE: bool = true>
-where
-    T: ?Sized,
-    S: LockStrategy,
-    L: LockStrategy,
-{
-    pub(crate) lock: &'a Lock<T, L>,
-    pub(crate) strategy_data: &'a S::StrategyData,
-    pub(crate) context: S::GuardContext,
-}
-
-impl<'a, T, S, L, const W: bool> Guard<'a, T, S, L, W>
-where
-    T: ?Sized,
-    S: LockStrategy,
-    L: LockStrategy,
-{
     #[must_use = "The returned `UnlockedGuard` must be used to relock the lock."]
-    pub fn unlock(mut self) -> UnlockedGuard<'a, T, S, L, W> {
-        unsafe { S::do_temporary_unlock(&self.strategy_data, &mut self.context) }
-
-        UnlockedGuard {
-            lock: self.lock,
-            strategy_data: self.strategy_data,
-            context: {
-                let me = ManuallyDrop::new(self);
-                // SAFETY: We are using `ManuallyDrop` to prevent the destructor from running.
-                unsafe { ptr::read(&me.context) }
-            },
-        }
-    }
-
-    /// # Safety
-    /// This function is unsafe because it allows you to unlock the lock without
-    /// dropping the guard. Using the guard after calling this function is
-    /// undefined behavior.
-    pub unsafe fn force_unlock(&mut self) {
-        unsafe { S::do_temporary_unlock(&self.strategy_data, &mut self.context) }
-    }
-
-    /// # Safety
-    /// Calling this function twice on a force unlocked guard will cause deadlocks.
-    pub unsafe fn force_relock(&mut self) {
-        unsafe { S::do_relock(&self.strategy_data, &mut self.context) }
-    }
-}
-
-impl<'a, T, S, L, const W: bool> UnlockedGuard<'a, T, S, L, W>
-where
-    T: ?Sized,
-    S: LockStrategy,
-    L: LockStrategy,
-{
-    #[must_use = "Throwing away the relocked guard is pointless."]
-    pub fn relock(mut self) -> Guard<'a, T, S, L, W> {
-        unsafe { S::do_relock(&self.strategy_data, &mut self.context) }
-
-        Guard {
-            lock: self.lock,
-            strategy_data: self.strategy_data,
-            context: {
-                let me = ManuallyDrop::new(self);
-                // SAFETY: We are using `ManuallyDrop` to prevent the destructor from running.
-                unsafe { ptr::read(&me.context) }
-            },
-        }
-    }
+    fn unlock(self) -> Self::Unlocked;
 }
 
-impl<T, S, L, const W: bool> Deref for Guard<'_, T, S, L, W>
-where
-    T: ?Sized,
-    S: LockStrategy,
-    L: LockStrategy,
-{
-    type Target = T;
+/// # Safety
+/// Implementors of this trait MUST ensure that the lock is correctly unlocked if
+/// the lock is stateful and dropped accidentally.
+pub unsafe trait UnlockedGuard: Send {
+    type Guard: UnlockableGuard;
 
-    fn deref(&self) -> &Self::Target {
-        unsafe { &*self.lock.value.get() }
-    }
-}
-
-impl<T, S, L> DerefMut for Guard<'_, T, S, L, true>
-where
-    T: ?Sized,
-    S: LockStrategy,
-    L: LockStrategy,
-{
-    fn deref_mut(&mut self) -> &mut Self::Target {
-        unsafe { &mut *self.lock.value.get() }
-    }
-}
-
-impl<T, S, L, const WRITE: bool> AsRef<T> for Guard<'_, T, S, L, WRITE>
-where
-    T: ?Sized,
-    S: LockStrategy,
-    L: LockStrategy,
-{
-    fn as_ref(&self) -> &T {
-        unsafe { &*self.lock.value.get() }
-    }
-}
-
-impl<T, S, L> AsMut<T> for Guard<'_, T, S, L, true>
-where
-    T: ?Sized,
-    S: LockStrategy,
-    L: LockStrategy,
-{
-    fn as_mut(&mut self) -> &mut T {
-        unsafe { &mut *self.lock.value.get() }
-    }
-}
-
-impl<T, S, L, const WRITE: bool> Drop for UnlockedGuard<'_, T, S, L, WRITE>
-where
-    T: ?Sized,
-    S: LockStrategy,
-    L: LockStrategy,
-{
-    fn drop(&mut self) {
-        // SAFETY: If we are stubborn enough to drop the unlocked guard, relock it and
-        //         then unlock it again to prevent anything weird from happening.
-        unsafe {
-            S::do_relock(&self.strategy_data, &mut self.context);
-            S::do_unlock(&self.strategy_data, &mut self.context);
-        }
-    }
-}
-
-impl<T, S, L, const WRITE: bool> Drop for Guard<'_, T, S, L, WRITE>
-where
-    T: ?Sized,
-    S: LockStrategy,
-    L: LockStrategy,
-{
-    fn drop(&mut self) {
-        unsafe { S::do_unlock(&self.strategy_data, &mut self.context) }
-    }
+    #[must_use = "Throwing away the relocked guard is pointless."]
+    fn relock(self) -> impl Future<Output = Self::Guard> + Send;
 }

+ 172 - 0
crates/eonix_sync/src/lazy_lock.rs

@@ -0,0 +1,172 @@
+use crate::{Relax, SpinRelax};
+use core::{
+    cell::UnsafeCell,
+    marker::PhantomData,
+    ops::Deref,
+    sync::atomic::{AtomicU8, Ordering},
+};
+
+enum LazyState<T, F>
+where
+    F: FnOnce() -> T,
+{
+    Uninitialized(F),
+    Initializing,
+    Initialized(T),
+}
+
+pub struct LazyLock<T, F = fn() -> T, R = SpinRelax>
+where
+    F: FnOnce() -> T,
+    R: Relax,
+{
+    value: UnsafeCell<LazyState<T, F>>,
+    state: AtomicU8,
+    _phantom: PhantomData<R>,
+}
+
+unsafe impl<T, F, R> Sync for LazyLock<T, F, R>
+where
+    T: Send + Sync,
+    F: FnOnce() -> T,
+    F: Send,
+    R: Relax,
+{
+}
+
+impl<T, F, R> LazyLock<T, F, R>
+where
+    F: FnOnce() -> T,
+    R: Relax,
+{
+    const UNINITIALIZED: u8 = 0;
+    const INITIALIZING: u8 = 1;
+    const INITIALIZED: u8 = 2;
+
+    pub const fn new(init: F) -> Self {
+        Self {
+            value: UnsafeCell::new(LazyState::Uninitialized(init)),
+            state: AtomicU8::new(Self::UNINITIALIZED),
+            _phantom: PhantomData,
+        }
+    }
+
+    /// # Safety
+    /// We should sync with the writer when calling this function or we could read stale data.
+    unsafe fn get_initialized_value(&self) -> Option<&T> {
+        // SAFETY: We're synced with the cpu that initialized it.
+        if let LazyState::Initialized(value) = unsafe { &*self.value.get() } {
+            Some(value)
+        } else {
+            None
+        }
+    }
+
+    /// Does the initialization of the value and leave `self.state` untouched. The caller
+    /// should set the state to `INITIALIZED` after calling this function.
+    ///
+    /// # Safety
+    /// This function is unsafe because concurrent calls would result in undefined behavior.
+    /// We should call this function exactly once with `self.state == INITIALIZING`.
+    unsafe fn do_initialization(&self) {
+        // SAFETY: We are the only thread that can access the value initializer.
+        let stateref = unsafe { &mut *self.value.get() };
+        let mut state = LazyState::Initializing;
+        core::mem::swap(stateref, &mut state);
+
+        if let LazyState::Uninitialized(init_func) = state {
+            state = LazyState::Initialized(init_func());
+        } else {
+            unreachable!("Invalid LazyLock state.");
+        };
+
+        core::mem::swap(stateref, &mut state);
+    }
+
+    /// Spin until the value is initialized. Guarantees that the initialized value is
+    /// visible to the caller cpu.
+    fn spin_until_initialized(&self) {
+        while self.state.load(Ordering::Acquire) != Self::INITIALIZED {
+            R::relax();
+        }
+    }
+
+    /// Get immutable reference to the wrapped value if initialized. Block until
+    /// the value is initialized by someone (including the caller itself) otherwise.
+    pub fn get(&self) -> &T {
+        match self.state.load(Ordering::Acquire) {
+            Self::UNINITIALIZED => match self.state.compare_exchange(
+                Self::UNINITIALIZED,
+                Self::INITIALIZING,
+                Ordering::Acquire,
+                Ordering::Acquire,
+            ) {
+                Ok(_) => unsafe {
+                    // SAFETY: We are the only thread doing initialization.
+                    self.do_initialization();
+                    self.state.store(Self::INITIALIZED, Ordering::Release);
+                },
+                Err(Self::INITIALIZING) => self.spin_until_initialized(),
+                Err(Self::INITIALIZED) => {}
+                Err(_) => unreachable!("Invalid LazyLock state."),
+            },
+            Self::INITIALIZING => self.spin_until_initialized(),
+            Self::INITIALIZED => {}
+            _ => unreachable!("Invalid LazyLock state."),
+        }
+
+        unsafe {
+            // SAFETY: If we're the spin waiter, we're synced with the cpu that initialized
+            //         it using `Acquire`. If we're the one that initialized it, no
+            //         synchronization is needed.
+            self.get_initialized_value()
+                .expect("Value should be initialized.")
+        }
+    }
+
+    pub fn get_mut(&mut self) -> &mut T {
+        match self.state.load(Ordering::Acquire) {
+            Self::UNINITIALIZED => {
+                self.state.swap(Self::INITIALIZING, Ordering::Acquire);
+                // SAFETY: We are the only thread doing initialization.
+                unsafe {
+                    self.do_initialization();
+                }
+                self.state.store(Self::INITIALIZED, Ordering::Release);
+            }
+            Self::INITIALIZED => {}
+            Self::INITIALIZING => unreachable!("We should be the only one initializing it."),
+            _ => unreachable!("Invalid LazyLock state."),
+        }
+
+        if let LazyState::Initialized(value) = self.value.get_mut() {
+            value
+        } else {
+            unreachable!("Invalid LazyLock state.");
+        }
+    }
+}
+
+impl<T, F, R> Deref for LazyLock<T, F, R>
+where
+    F: FnOnce() -> T,
+    R: Relax,
+{
+    type Target = T;
+
+    fn deref(&self) -> &Self::Target {
+        self.get()
+    }
+}
+
+impl<T, U, F, R> AsRef<U> for LazyLock<T, F, R>
+where
+    U: ?Sized,
+    F: FnOnce() -> T,
+    R: Relax,
+    <Self as Deref>::Target: AsRef<U>,
+{
+    fn as_ref(&self) -> &U {
+        self.deref().as_ref()
+    }
+}

+ 17 - 7
crates/eonix_sync/src/lib.rs

@@ -1,13 +1,23 @@
 #![no_std]
 
 mod guard;
-mod lock;
+mod lazy_lock;
+mod locked;
+pub mod marker;
+mod mutex;
+mod rwlock;
 mod spin;
-mod strategy;
+mod wait_list;
 
-pub use guard::Guard;
-pub use lock::Lock;
-pub use spin::{IrqStrategy, SpinStrategy};
-pub use strategy::LockStrategy;
+pub use guard::{UnlockableGuard, UnlockedGuard};
+pub use lazy_lock::LazyLock;
+pub use locked::{AsProof, AsProofMut, Locked, Proof, ProofMut};
+pub use mutex::{Mutex, MutexGuard};
+pub use rwlock::{RwLock, RwLockReadGuard, RwLockWriteGuard};
+pub use spin::{
+    LoopRelax, Relax, Spin, SpinGuard, SpinIrqGuard, SpinRelax, UnlockedSpinGuard,
+    UnlockedSpinIrqGuard,
+};
+pub use wait_list::WaitList;
 
-pub type Spin<T> = Lock<T, SpinStrategy>;
+extern crate alloc;

+ 0 - 139
crates/eonix_sync/src/lock.rs

@@ -1,139 +0,0 @@
-use super::{spin::IrqStrategy, strategy::LockStrategy};
-use crate::Guard;
-use core::{cell::UnsafeCell, fmt};
-
-pub struct Lock<T, S>
-where
-    T: ?Sized,
-    S: LockStrategy,
-{
-    pub(crate) strategy_data: S::StrategyData,
-    pub(crate) value: UnsafeCell<T>,
-}
-
-// SAFETY: As long as the value protected by the lock is able to be shared between threads,
-//         the lock itself is also able to be shared between threads.
-unsafe impl<T, S> Send for Lock<T, S>
-where
-    T: ?Sized + Send,
-    S: LockStrategy,
-{
-}
-
-// SAFETY: As long as the value protected by the lock is able to be shared between threads,
-//         the lock will provide synchronization between threads.
-unsafe impl<T, S> Sync for Lock<T, S>
-where
-    T: ?Sized + Send,
-    S: LockStrategy,
-{
-}
-
-impl<T, S> Lock<T, S>
-where
-    S: LockStrategy,
-{
-    #[inline(always)]
-    pub fn new(value: T) -> Self {
-        Self {
-            strategy_data: S::new_data(),
-            value: UnsafeCell::new(value),
-        }
-    }
-}
-
-impl<T, S> Lock<T, S>
-where
-    T: ?Sized,
-    S: LockStrategy,
-{
-    pub fn is_locked(&self) -> bool {
-        unsafe { S::is_locked(&self.strategy_data) }
-    }
-
-    pub fn try_lock(&self) -> Option<Guard<T, S, S>> {
-        if !unsafe { S::is_locked(&self.strategy_data) } {
-            unsafe { S::try_lock(&self.strategy_data) }.map(|context| Guard {
-                lock: self,
-                strategy_data: &self.strategy_data,
-                context,
-            })
-        } else {
-            None
-        }
-    }
-
-    pub fn lock(&self) -> Guard<T, S, S> {
-        Guard {
-            lock: self,
-            strategy_data: &self.strategy_data,
-            context: unsafe { S::do_lock(&self.strategy_data) },
-        }
-    }
-
-    pub fn lock_irq(&self) -> Guard<T, IrqStrategy<S>, S> {
-        Guard {
-            lock: self,
-            strategy_data: &self.strategy_data,
-            context: unsafe { IrqStrategy::<S>::do_lock(&self.strategy_data) },
-        }
-    }
-
-    pub fn lock_shared(&self) -> Guard<T, S, S, false> {
-        Guard {
-            lock: self,
-            strategy_data: &self.strategy_data,
-            context: unsafe { S::do_lock_shared(&self.strategy_data) },
-        }
-    }
-
-    pub fn lock_shared_irq(&self) -> Guard<T, IrqStrategy<S>, S, false> {
-        Guard {
-            lock: self,
-            strategy_data: &self.strategy_data,
-            context: unsafe { IrqStrategy::<S>::do_lock(&self.strategy_data) },
-        }
-    }
-
-    pub fn get_mut(&mut self) -> &mut T {
-        unsafe { &mut *self.value.get() }
-    }
-}
-
-impl<T, S> fmt::Debug for Lock<T, S>
-where
-    T: fmt::Debug,
-    S: LockStrategy,
-{
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.debug_struct("Lock")
-            .field("locked_value", &self.value)
-            .finish()
-    }
-}
-
-impl<T, S> Clone for Lock<T, S>
-where
-    T: Clone,
-    S: LockStrategy,
-{
-    fn clone(&self) -> Self {
-        Self {
-            strategy_data: S::new_data(),
-            value: UnsafeCell::new(self.lock_shared().clone()),
-        }
-    }
-}
-
-impl<T, S> Default for Lock<T, S>
-where
-    T: Default,
-    S: LockStrategy,
-{
-    fn default() -> Self {
-        Self {
-            strategy_data: S::new_data(),
-            value: Default::default(),
-        }
-    }
-}

+ 95 - 0
crates/eonix_sync/src/locked.rs

@@ -0,0 +1,95 @@
+mod proof;
+
+use core::{cell::UnsafeCell, fmt, ptr::NonNull};
+
+pub use proof::{AsProof, AsProofMut, Proof, ProofMut};
+
+/// A lock to protect a value of type `T` using the proof of access to some
+/// value of type `U`.
+pub struct Locked<T, U>
+where
+    U: ?Sized,
+{
+    inner: UnsafeCell<T>,
+    #[cfg(not(feature = "no_check_locked"))]
+    guard: NonNull<U>,
+    #[cfg(feature = "no_check_locked")]
+    _phantom: core::marker::PhantomData<NonNull<U>>,
+}
+
+/// SAFETY: The `Locked` type is safe to send across threads as long as
+/// the inner type `T` is `Send`. The `guard` pointer is not used to access
+/// the inner value, so no constraints are needed on it.
+unsafe impl<T, U> Send for Locked<T, U>
+where
+    T: Send,
+    U: ?Sized,
+{
+}
+
+/// SAFETY: The `Locked` type is safe to share across threads as long as
+/// the inner type `T` is `Send` and `Sync`. The `guard` pointer is not used
+/// to access the inner value, so no constraints are needed on it.
+unsafe impl<T, U> Sync for Locked<T, U>
+where
+    T: Send + Sync,
+    U: ?Sized,
+{
+}
+
+impl<T, U> Locked<T, U>
+where
+    U: ?Sized,
+{
+    pub const fn new(value: T, guard: &U) -> Self {
+        Self {
+            inner: UnsafeCell::new(value),
+            #[cfg(not(feature = "no_check_locked"))]
+            // SAFETY: The validity of address is guaranteed by the borrow checker.
+            guard: unsafe { NonNull::new_unchecked(&raw const *guard as *mut U) },
+            #[cfg(feature = "no_check_locked")]
+            guard: core::marker::PhantomData,
+        }
+    }
+}
+
+impl<T, U> Locked<T, U>
+where
+    T: Send + Sync,
+    U: ?Sized,
+{
+    pub fn access<'a, 'b>(&'a self, _guard: Proof<'b, U>) -> &'a T
+    where
+        'b: 'a,
+    {
+        #[cfg(not(feature = "no_check_locked"))]
+        assert_eq!(self.guard, _guard.address, "Locked::access(): Wrong guard");
+        // SAFETY: The guard protects the shared access to the inner value.
+        unsafe { self.inner.get().as_ref().unwrap() }
+    }
+
+    pub fn access_mut<'a, 'b>(&'a self, _guard: ProofMut<'b, U>) -> &'a mut T
+    where
+        'b: 'a,
+    {
+        #[cfg(not(feature = "no_check_locked"))]
+        assert_eq!(
+            self.guard, _guard.address,
+            "Locked::access_mut(): Wrong guard"
+        );
+        // SAFETY: The guard protects the exclusive access to the inner value.
+        unsafe { self.inner.get().as_mut().unwrap() }
+    }
+}
+
+impl<T, U> fmt::Debug for Locked<T, U>
+where
+    U: ?Sized,
+{
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("Locked")
+            .field("value", &self.inner)
+            .field("guard", &self.guard)
+            .finish()
+    }
+}

+ 182 - 0
crates/eonix_sync/src/locked/proof.rs

@@ -0,0 +1,182 @@
+use core::{marker::PhantomData, ptr::NonNull};
+
+/// A proof of mutable access to a position in memory with lifetime `'pos`.
+///
+/// Note that this is just the proof of access, we aren't actually permitted
+/// to mutate the memory location.
+///
+/// We can choose whether to check the validity of the proof or not at runtime.
+pub struct ProofMut<'pos, T>
+where
+    T: ?Sized,
+{
+    pub(super) address: NonNull<T>,
+    _phantom: PhantomData<&'pos ()>,
+}
+
+/// A proof of immutable access to a position in memory with lifetime `'pos`.
+///
+/// We can choose whether to check the validity of the proof or not at runtime.
+pub struct Proof<'pos, T>
+where
+    T: ?Sized,
+{
+    pub(super) address: NonNull<T>,
+    _phantom: PhantomData<&'pos ()>,
+}
+
+/// A trait for types that can be converted to a proof of mutable access.
+///
+/// This is used to prove that a mutable reference is valid for the lifetime `'pos`
+/// through this object with lifetime `'guard`.
+///
+/// ## Safety
+/// This trait is unsafe because it allows the caller to create a proof of access
+/// to a memory location that may not be valid for the lifetime `'pos`. The implementer
+/// must ensure that the access to the memory location is valid for the lifetime `'pos`
+/// during the lifetime of the returned `Proof`. This is typically done by using a lock
+/// or other synchronization mechanism to ensure that the memory location is not accessed
+/// by others while the proof is being created.
+pub unsafe trait AsProofMut<'guard, 'pos, T>: 'guard
+where
+    T: ?Sized,
+{
+    fn prove_mut(&self) -> ProofMut<'pos, T>
+    where
+        'guard: 'pos;
+}
+
+/// A trait for types that can be converted to a proof of immutable access.
+///
+/// This is used to prove that an immutable reference is valid for the lifetime `'pos`
+/// through this object with lifetime `'guard`.
+///
+/// ## Safety
+/// This trait is unsafe because it allows the caller to create a proof of access
+/// to a memory location that may not be valid for the lifetime `'pos`. The implementer
+/// must ensure that the access to the memory location is valid for the lifetime `'pos`
+/// during the lifetime of the returned `Proof`. This is typically done by using a lock
+/// or other synchronization mechanism to ensure that the memory location is not accessed
+/// by others while the proof is being created.
+pub unsafe trait AsProof<'guard, 'pos, T>: 'guard
+where
+    T: ?Sized,
+{
+    fn prove(&self) -> Proof<'pos, T>
+    where
+        'guard: 'pos;
+}
+
+impl<T> Proof<'_, T>
+where
+    T: ?Sized,
+{
+    /// # Safety
+    /// The caller must ensure valid access for at least the lifetime `'pos`.
+    pub const unsafe fn new(address: *const T) -> Self {
+        Self {
+            // SAFETY: The validity of the reference is guaranteed by the caller.
+            address: unsafe { NonNull::new_unchecked(address as *mut _) },
+            _phantom: PhantomData,
+        }
+    }
+}
+
+impl<T> ProofMut<'_, T>
+where
+    T: ?Sized,
+{
+    /// # Safety
+    /// The caller must ensure valid access for at least the lifetime `'pos`.
+    pub const unsafe fn new(address: *mut T) -> Self {
+        Self {
+            // SAFETY: The validity of the reference is guaranteed by the caller.
+            address: unsafe { NonNull::new_unchecked(address as *mut _) },
+            _phantom: PhantomData,
+        }
+    }
+}
+
+/// Proof of mutable access to a position in memory can be duplicated.
+impl<T> Copy for ProofMut<'_, T> where T: ?Sized {}
+
+/// Proof of mutable access to a position in memory can be duplicated.
+impl<T> Clone for ProofMut<'_, T>
+where
+    T: ?Sized,
+{
+    fn clone(&self) -> Self {
+        Self {
+            address: self.address,
+            _phantom: self._phantom,
+        }
+    }
+}
+
+/// Proof of immutable access to a position in memory can be duplicated.
+impl<T> Copy for Proof<'_, T> where T: ?Sized {}
+
+/// Proof of immutable access to a position in memory can be duplicated.
+impl<T> Clone for Proof<'_, T>
+where
+    T: ?Sized,
+{
+    fn clone(&self) -> Self {
+        Self {
+            address: self.address,
+            _phantom: self._phantom,
+        }
+    }
+}
+
+/// SAFETY: The reference is valid for the lifetime `'guard`. So the access must be
+/// valid for the lifetime `'pos` that is shorter than `'guard`.
+unsafe impl<'guard, 'pos, T> AsProofMut<'guard, 'pos, T> for &'guard mut T
+where
+    T: ?Sized,
+{
+    fn prove_mut(&self) -> ProofMut<'pos, T>
+    where
+        'guard: 'pos,
+    {
+        ProofMut {
+            // SAFETY: The validity of the reference is guaranteed by the borrow checker.
+            address: unsafe { NonNull::new_unchecked(&raw const **self as *mut _) },
+            _phantom: PhantomData,
+        }
+    }
+}
+
+/// SAFETY: The reference is valid for the lifetime `'guard`. So the access must be
+/// valid for the lifetime `'pos` that is shorter than `'guard`.
+unsafe impl<'guard, 'pos, T> AsProof<'guard, 'pos, T> for &'guard T
+where
+    T: ?Sized,
+{
+    fn prove(&self) -> Proof<'pos, T>
+    where
+        'guard: 'pos,
+    {
+        Proof {
+            address: unsafe { NonNull::new_unchecked(&raw const **self as *mut _) },
+            _phantom: PhantomData,
+        }
+    }
+}
+
+/// SAFETY: The reference is valid for the lifetime `'guard`. So the access must be
+/// valid for the lifetime `'pos` that is shorter than `'guard`.
+unsafe impl<'guard, 'pos, T> AsProof<'guard, 'pos, T> for &'guard mut T
+where
+    T: ?Sized,
+{
+    fn prove(&self) -> Proof<'pos, T>
+    where
+        'guard: 'pos,
+    {
+        Proof {
+            address: unsafe { NonNull::new_unchecked(&raw const **self as *mut _) },
+            _phantom: PhantomData,
+        }
+    }
+}

+ 12 - 0
crates/eonix_sync/src/marker.rs

@@ -0,0 +1,12 @@
+use core::{cell::UnsafeCell, marker::PhantomData};
+
+/// A marker type that indicates that the type is not `Send`.
+pub struct NotSend(PhantomData<*const ()>);
+
+/// A marker type that indicates that the type is not `Sync`.
+#[allow(dead_code)]
+pub struct NotSync(PhantomData<UnsafeCell<()>>);
+
+// SAFETY: This is a marker type that indicates that the type is not `Send`.
+//         So no restrictions on `Sync` are needed.
+unsafe impl Sync for NotSend {}

+ 96 - 0
crates/eonix_sync/src/mutex.rs

@@ -0,0 +1,96 @@
+mod guard;
+
+use crate::WaitList;
+use core::{
+    cell::UnsafeCell,
+    pin::pin,
+    sync::atomic::{AtomicBool, Ordering},
+};
+
+pub use guard::MutexGuard;
+
+#[derive(Debug, Default)]
+pub struct Mutex<T>
+where
+    T: ?Sized,
+{
+    locked: AtomicBool,
+    wait_list: WaitList,
+    value: UnsafeCell<T>,
+}
+
+impl<T> Mutex<T> {
+    pub const fn new(value: T) -> Self {
+        Self {
+            locked: AtomicBool::new(false),
+            wait_list: WaitList::new(),
+            value: UnsafeCell::new(value),
+        }
+    }
+}
+
+impl<T> Mutex<T>
+where
+    T: ?Sized,
+{
+    /// # Safety
+    /// This function is unsafe because the caller MUST ensure that we've got the
+    /// exclusive access before calling this function.
+    unsafe fn get_lock(&self) -> MutexGuard<'_, T> {
+        MutexGuard {
+            lock: self,
+            // SAFETY: We are holding the lock, so we can safely access the value.
+            value: unsafe { &mut *self.value.get() },
+        }
+    }
+
+    pub fn try_lock(&self) -> Option<MutexGuard<'_, T>> {
+        self.locked
+            .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)
+            .ok()
+            .map(|_| unsafe { self.get_lock() })
+    }
+
+    fn try_lock_weak(&self) -> Option<MutexGuard<'_, T>> {
+        self.locked
+            .compare_exchange_weak(false, true, Ordering::Acquire, Ordering::Relaxed)
+            .ok()
+            .map(|_| unsafe { self.get_lock() })
+    }
+
+    #[cold]
+    async fn lock_slow_path(&self) -> MutexGuard<'_, T> {
+        loop {
+            let mut wait = pin!(self.wait_list.prepare_to_wait());
+            wait.as_mut().add_to_wait_list();
+
+            if let Some(guard) = self.try_lock_weak() {
+                return guard;
+            }
+
+            wait.await;
+        }
+    }
+
+    pub async fn lock(&self) -> MutexGuard<'_, T> {
+        if let Some(guard) = self.try_lock() {
+            // Quick path
+            guard
+        } else {
+            self.lock_slow_path().await
+        }
+    }
+
+    pub fn get_mut(&mut self) -> &mut T {
+        // SAFETY: The exclusive access to the lock is guaranteed by the borrow checker.
+        unsafe { &mut *self.value.get() }
+    }
+}
+
+// SAFETY: As long as the value protected by the lock is able to be shared between threads,
+//         we can send the lock between threads.
+unsafe impl<T> Send for Mutex<T> where T: ?Sized + Send {}
+
+// SAFETY: `RwLock` can provide exclusive access to the value it protects, so it is safe to
+//         implement `Sync` for it as long as the protected value is `Send`.
+unsafe impl<T> Sync for Mutex<T> where T: ?Sized + Send {}

+ 98 - 0
crates/eonix_sync/src/mutex/guard.rs

@@ -0,0 +1,98 @@
+use super::Mutex;
+use crate::{UnlockableGuard, UnlockedGuard};
+use core::{
+    ops::{Deref, DerefMut},
+    sync::atomic::Ordering,
+};
+
+pub struct MutexGuard<'a, T>
+where
+    T: ?Sized,
+{
+    pub(super) lock: &'a Mutex<T>,
+    pub(super) value: &'a mut T,
+}
+
+pub struct UnlockedMutexGuard<'a, T>(&'a Mutex<T>)
+where
+    T: ?Sized;
+
+impl<T> Drop for MutexGuard<'_, T>
+where
+    T: ?Sized,
+{
+    fn drop(&mut self) {
+        let locked = self.lock.locked.swap(false, Ordering::Release);
+        debug_assert!(
+            locked,
+            "MutexGuard::drop(): unlock() called on an unlocked mutex.",
+        );
+        self.lock.wait_list.notify_one();
+    }
+}
+
+impl<T> Deref for MutexGuard<'_, T>
+where
+    T: ?Sized,
+{
+    type Target = T;
+
+    fn deref(&self) -> &Self::Target {
+        self.value
+    }
+}
+
+impl<T> DerefMut for MutexGuard<'_, T>
+where
+    T: ?Sized,
+{
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        self.value
+    }
+}
+
+impl<T, U> AsRef<U> for MutexGuard<'_, T>
+where
+    T: ?Sized,
+    U: ?Sized,
+    <Self as Deref>::Target: AsRef<U>,
+{
+    fn as_ref(&self) -> &U {
+        self.deref().as_ref()
+    }
+}
+
+impl<T, U> AsMut<U> for MutexGuard<'_, T>
+where
+    T: ?Sized + AsMut<U>,
+    U: ?Sized,
+    <Self as Deref>::Target: AsMut<U>,
+{
+    fn as_mut(&mut self) -> &mut U {
+        self.deref_mut().as_mut()
+    }
+}
+
+impl<'a, T> UnlockableGuard for MutexGuard<'a, T>
+where
+    T: ?Sized + Send,
+{
+    type Unlocked = UnlockedMutexGuard<'a, T>;
+
+    fn unlock(self) -> Self::Unlocked {
+        // The lock will be unlocked when the guard is dropped.
+        UnlockedMutexGuard(self.lock)
+    }
+}
+
+unsafe impl<'a, T> UnlockedGuard for UnlockedMutexGuard<'a, T>
+where
+    T: ?Sized + Send,
+{
+    type Guard = MutexGuard<'a, T>;
+
+    async fn relock(self) -> Self::Guard {
+        let Self(lock) = self;
+        lock.lock().await
+    }
+}

+ 194 - 0
crates/eonix_sync/src/rwlock.rs

@@ -0,0 +1,194 @@
+mod guard;
+
+use crate::WaitList;
+use core::{
+    cell::UnsafeCell,
+    pin::pin,
+    sync::atomic::{AtomicIsize, Ordering},
+};
+
+pub use guard::{RwLockReadGuard, RwLockWriteGuard};
+
+#[derive(Debug, Default)]
+pub struct RwLock<T>
+where
+    T: ?Sized,
+{
+    counter: AtomicIsize,
+    read_wait: WaitList,
+    write_wait: WaitList,
+    value: UnsafeCell<T>,
+}
+
+impl<T> RwLock<T> {
+    pub const fn new(value: T) -> Self {
+        Self {
+            counter: AtomicIsize::new(0),
+            read_wait: WaitList::new(),
+            write_wait: WaitList::new(),
+            value: UnsafeCell::new(value),
+        }
+    }
+}
+
+impl<T> RwLock<T>
+where
+    T: ?Sized,
+{
+    /// # Safety
+    /// This function is unsafe because the caller MUST ensure that we've got the
+    /// write access before calling this function.
+    unsafe fn write_lock(&self) -> RwLockWriteGuard<'_, T> {
+        RwLockWriteGuard {
+            lock: self,
+            // SAFETY: We are holding the write lock, so we can safely access the value.
+            value: unsafe { &mut *self.value.get() },
+        }
+    }
+
+    /// # Safety
+    /// This function is unsafe because the caller MUST ensure that we've got the
+    /// read access before calling this function.
+    unsafe fn read_lock(&self) -> RwLockReadGuard<'_, T> {
+        RwLockReadGuard {
+            lock: self,
+            // SAFETY: We are holding the read lock, so we can safely access the value.
+            value: unsafe { &*self.value.get() },
+        }
+    }
+
+    /// # Safety
+    /// This function is unsafe because the caller MUST ensure that we won't hold any
+    /// references to the value after calling this function.
+    pub(self) unsafe fn write_unlock(&self) {
+        let old = self.counter.swap(0, Ordering::Release);
+        debug_assert_eq!(
+            old, -1,
+            "RwLock::write_unlock(): erroneous counter value: {}",
+            old
+        );
+        if !self.write_wait.notify_one() {
+            self.read_wait.notify_all();
+        }
+    }
+
+    /// # Safety
+    /// This function is unsafe because the caller MUST ensure that we won't hold any
+    /// references to the value after calling this function.
+    pub(self) unsafe fn read_unlock(&self) {
+        match self.counter.fetch_sub(1, Ordering::Release) {
+            2.. => {}
+            1 => {
+                if !self.write_wait.notify_one() {
+                    self.read_wait.notify_all();
+                }
+            }
+            val => unreachable!("RwLock::read_unlock(): erroneous counter value: {}", val),
+        }
+    }
+
+    pub fn try_write(&self) -> Option<RwLockWriteGuard<'_, T>> {
+        self.counter
+            .compare_exchange(0, -1, Ordering::Acquire, Ordering::Relaxed)
+            .ok()
+            .map(|_| unsafe { self.write_lock() })
+    }
+
+    fn try_write_weak(&self) -> Option<RwLockWriteGuard<'_, T>> {
+        self.counter
+            .compare_exchange_weak(0, -1, Ordering::Acquire, Ordering::Relaxed)
+            .ok()
+            .map(|_| unsafe { self.write_lock() })
+    }
+
+    pub fn try_read(&self) -> Option<RwLockReadGuard<'_, T>> {
+        // We'll spin if we fail here anyway.
+        if self.write_wait.has_waiters() {
+            return None;
+        }
+
+        let counter = self.counter.load(Ordering::Relaxed);
+        if counter >= 0 {
+            self.counter
+                .compare_exchange(counter, counter + 1, Ordering::Acquire, Ordering::Relaxed)
+                .ok()
+                .map(|_| unsafe { self.read_lock() })
+        } else {
+            None
+        }
+    }
+
+    fn try_read_weak(&self) -> Option<RwLockReadGuard<'_, T>> {
+        // TODO: If we check write waiters here, we would lose wakeups.
+        //       Try locking the wait lists to prevent this.
+
+        let counter = self.counter.load(Ordering::Relaxed);
+        if counter >= 0 {
+            self.counter
+                .compare_exchange_weak(counter, counter + 1, Ordering::Acquire, Ordering::Relaxed)
+                .ok()
+                .map(|_| unsafe { self.read_lock() })
+        } else {
+            None
+        }
+    }
+
+    #[cold]
+    async fn write_slow_path(&self) -> RwLockWriteGuard<'_, T> {
+        loop {
+            let mut wait = pin!(self.write_wait.prepare_to_wait());
+            wait.as_mut().add_to_wait_list();
+
+            if let Some(guard) = self.try_write_weak() {
+                return guard;
+            }
+
+            wait.await;
+        }
+    }
+
+    #[cold]
+    async fn read_slow_path(&self) -> RwLockReadGuard<'_, T> {
+        loop {
+            let mut wait = pin!(self.read_wait.prepare_to_wait());
+            wait.as_mut().add_to_wait_list();
+
+            if let Some(guard) = self.try_read_weak() {
+                return guard;
+            }
+
+            wait.await;
+        }
+    }
+
+    pub async fn write(&self) -> RwLockWriteGuard<'_, T> {
+        if let Some(guard) = self.try_write() {
+            // Quick path
+            guard
+        } else {
+            self.write_slow_path().await
+        }
+    }
+
+    pub async fn read(&self) -> RwLockReadGuard<'_, T> {
+        if let Some(guard) = self.try_read() {
+            // Quick path
+            guard
+        } else {
+            self.read_slow_path().await
+        }
+    }
+
+    pub fn get_mut(&mut self) -> &mut T {
+        // SAFETY: The exclusive access to the lock is guaranteed by the borrow checker.
+        unsafe { &mut *self.value.get() }
+    }
+}
+
+// SAFETY: As long as the value protected by the lock is able to be shared between threads,
+//         we can send the lock between threads.
+unsafe impl<T> Send for RwLock<T> where T: ?Sized + Send {}
+
+// SAFETY: `RwLock` can provide shared access to the value it protects, so it is safe to
+//         implement `Sync` for it. However, this is only true if the value itself is `Sync`.
+unsafe impl<T> Sync for RwLock<T> where T: ?Sized + Send + Sync {}

+ 192 - 0
crates/eonix_sync/src/rwlock/guard.rs

@@ -0,0 +1,192 @@
+use super::RwLock;
+use crate::{AsProof, AsProofMut, Proof, ProofMut, UnlockableGuard, UnlockedGuard};
+use core::ops::{Deref, DerefMut};
+
+pub struct RwLockWriteGuard<'a, T>
+where
+    T: ?Sized,
+{
+    pub(super) lock: &'a RwLock<T>,
+    pub(super) value: &'a mut T,
+}
+
+pub struct RwLockReadGuard<'a, T>
+where
+    T: ?Sized,
+{
+    pub(super) lock: &'a RwLock<T>,
+    pub(super) value: &'a T,
+}
+
+pub struct UnlockedRwLockReadGuard<'a, T>(&'a RwLock<T>)
+where
+    T: ?Sized;
+
+pub struct UnlockedRwLockWriteGuard<'a, T>(&'a RwLock<T>)
+where
+    T: ?Sized;
+
+impl<T> Drop for RwLockWriteGuard<'_, T>
+where
+    T: ?Sized,
+{
+    fn drop(&mut self) {
+        unsafe {
+            // SAFETY: We are dropping the guard.
+            self.lock.write_unlock();
+        }
+    }
+}
+
+impl<T> Drop for RwLockReadGuard<'_, T>
+where
+    T: ?Sized,
+{
+    fn drop(&mut self) {
+        unsafe {
+            // SAFETY: We are dropping the guard.
+            self.lock.read_unlock();
+        }
+    }
+}
+
+impl<T> Deref for RwLockWriteGuard<'_, T>
+where
+    T: ?Sized,
+{
+    type Target = T;
+
+    fn deref(&self) -> &Self::Target {
+        self.value
+    }
+}
+
+impl<T> DerefMut for RwLockWriteGuard<'_, T>
+where
+    T: ?Sized,
+{
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        self.value
+    }
+}
+
+impl<T, U> AsRef<U> for RwLockWriteGuard<'_, T>
+where
+    T: ?Sized,
+    U: ?Sized,
+    <Self as Deref>::Target: AsRef<U>,
+{
+    fn as_ref(&self) -> &U {
+        self.deref().as_ref()
+    }
+}
+
+impl<T, U> AsMut<U> for RwLockWriteGuard<'_, T>
+where
+    T: ?Sized,
+    U: ?Sized,
+    <Self as Deref>::Target: AsMut<U>,
+{
+    fn as_mut(&mut self) -> &mut U {
+        self.deref_mut().as_mut()
+    }
+}
+
+impl<T> Deref for RwLockReadGuard<'_, T>
+where
+    T: ?Sized,
+{
+    type Target = T;
+
+    fn deref(&self) -> &Self::Target {
+        self.value
+    }
+}
+
+impl<T, U> AsRef<U> for RwLockReadGuard<'_, T>
+where
+    T: ?Sized,
+    U: ?Sized,
+    <Self as Deref>::Target: AsRef<U>,
+{
+    fn as_ref(&self) -> &U {
+        self.deref().as_ref()
+    }
+}
+
+unsafe impl<'guard, 'pos, T> AsProof<'guard, 'pos, T> for RwLockWriteGuard<'guard, T>
+where
+    T: ?Sized,
+{
+    fn prove(&self) -> Proof<'pos, T> {
+        unsafe { Proof::new(&raw const *self.value) }
+    }
+}
+
+unsafe impl<'guard, 'pos, T> AsProofMut<'guard, 'pos, T> for RwLockWriteGuard<'guard, T>
+where
+    T: ?Sized,
+{
+    fn prove_mut(&self) -> ProofMut<'pos, T> {
+        unsafe { ProofMut::new(&raw const *self.value as *mut _) }
+    }
+}
+
+unsafe impl<'guard, 'pos, T> AsProof<'guard, 'pos, T> for RwLockReadGuard<'guard, T>
+where
+    T: ?Sized,
+{
+    fn prove(&self) -> Proof<'pos, T> {
+        unsafe { Proof::new(&raw const *self.value) }
+    }
+}
+
+impl<'a, T> UnlockableGuard for RwLockReadGuard<'a, T>
+where
+    T: ?Sized + Send + Sync,
+{
+    type Unlocked = UnlockedRwLockReadGuard<'a, T>;
+
+    fn unlock(self) -> Self::Unlocked {
+        // The lock will be unlocked when the guard is dropped.
+        UnlockedRwLockReadGuard(self.lock)
+    }
+}
+
+// SAFETY: `UnlockedRwLockReadGuard` is stateless.
+unsafe impl<'a, T> UnlockedGuard for UnlockedRwLockReadGuard<'a, T>
+where
+    T: ?Sized + Send + Sync,
+{
+    type Guard = RwLockReadGuard<'a, T>;
+
+    async fn relock(self) -> Self::Guard {
+        let Self(lock) = self;
+        lock.read().await
+    }
+}
+
+impl<'a, T> UnlockableGuard for RwLockWriteGuard<'a, T>
+where
+    T: ?Sized + Send + Sync,
+{
+    type Unlocked = UnlockedRwLockWriteGuard<'a, T>;
+
+    fn unlock(self) -> Self::Unlocked {
+        // The lock will be unlocked when the guard is dropped.
+        UnlockedRwLockWriteGuard(self.lock)
+    }
+}
+
+// SAFETY: `UnlockedRwLockWriteGuard` is stateless.
+unsafe impl<'a, T> UnlockedGuard for UnlockedRwLockWriteGuard<'a, T>
+where
+    T: ?Sized + Send + Sync,
+{
+    type Guard = RwLockWriteGuard<'a, T>;
+
+    async fn relock(self) -> Self::Guard {
+        let Self(lock) = self;
+        lock.write().await
+    }
+}

+ 89 - 95
crates/eonix_sync/src/spin.rs

@@ -1,122 +1,116 @@
-use super::strategy::LockStrategy;
+mod guard;
+mod relax;
+mod spin_irq;
+
 use core::{
-    arch::asm,
+    cell::UnsafeCell,
     marker::PhantomData,
     sync::atomic::{AtomicBool, Ordering},
 };
-
-pub struct SpinStrategy;
-pub struct IrqStrategy<Strategy: LockStrategy>(PhantomData<Strategy>);
-
-impl SpinStrategy {
-    fn is_locked(data: &<Self as LockStrategy>::StrategyData) -> bool {
-        data.load(Ordering::Relaxed)
-    }
+use spin_irq::IrqStateGuard;
+
+pub use guard::{SpinGuard, UnlockedSpinGuard};
+pub use relax::{LoopRelax, Relax, SpinRelax};
+pub use spin_irq::{SpinIrqGuard, UnlockedSpinIrqGuard};
+
+//// A spinlock is a lock that uses busy-waiting to acquire the lock.
+/// It is useful for short critical sections where the overhead of a context switch
+/// is too high.
+#[derive(Debug, Default)]
+pub struct Spin<T, R = SpinRelax>
+where
+    T: ?Sized,
+{
+    _phantom: PhantomData<R>,
+    locked: AtomicBool,
+    value: UnsafeCell<T>,
 }
 
-unsafe impl LockStrategy for SpinStrategy {
-    type StrategyData = AtomicBool;
-    type GuardContext = ();
-
-    fn new_data() -> Self::StrategyData {
-        AtomicBool::new(false)
-    }
-
-    unsafe fn is_locked(data: &Self::StrategyData) -> bool {
-        data.load(Ordering::Relaxed)
-    }
-
-    unsafe fn try_lock(data: &Self::StrategyData) -> Option<Self::GuardContext> {
-        use Ordering::{Acquire, Relaxed};
-        eonix_preempt::disable();
-
-        if data.compare_exchange(false, true, Acquire, Relaxed).is_ok() {
-            Some(())
-        } else {
-            None
-        }
-    }
-
-    unsafe fn do_lock(data: &Self::StrategyData) -> Self::GuardContext {
-        use Ordering::{Acquire, Relaxed};
-        eonix_preempt::disable();
-
-        while data
-            .compare_exchange_weak(false, true, Acquire, Relaxed)
-            .is_err()
-        {
-            while Self::is_locked(data) {
-                core::hint::spin_loop();
-            }
+impl<T, R> Spin<T, R>
+where
+    R: Relax,
+{
+    pub const fn new(value: T) -> Self {
+        Self {
+            locked: AtomicBool::new(false),
+            value: UnsafeCell::new(value),
+            _phantom: PhantomData,
         }
     }
+}
 
-    unsafe fn do_unlock(data: &Self::StrategyData, _: &mut Self::GuardContext) {
-        data.store(false, Ordering::Release);
+impl<T, R> Spin<T, R>
+where
+    T: ?Sized,
+{
+    /// # Safety
+    /// This function is unsafe because the caller MUST ensure that the protected
+    /// value is no longer accessed after calling this function.
+    unsafe fn do_unlock(&self) {
+        let locked = self.locked.swap(false, Ordering::Release);
+        debug_assert!(locked, "Spin::unlock(): Unlocking an unlocked lock");
         eonix_preempt::enable();
     }
 }
 
-unsafe impl<Strategy: LockStrategy> LockStrategy for IrqStrategy<Strategy> {
-    type StrategyData = Strategy::StrategyData;
-    type GuardContext = (Strategy::GuardContext, usize);
-
-    fn new_data() -> Self::StrategyData {
-        Strategy::new_data()
-    }
-
-    unsafe fn do_lock(data: &Self::StrategyData) -> Self::GuardContext {
-        let mut context: usize;
-
-        unsafe {
-            asm!(
-                "pushf",
-                "pop {context}",
-                "cli",
-                context = out(reg) context,
-            );
+impl<T, R> Spin<T, R>
+where
+    T: ?Sized,
+    R: Relax,
+{
+    pub fn lock(&self) -> SpinGuard<'_, T, R> {
+        self.do_lock();
+
+        SpinGuard {
+            lock: self,
+            // SAFETY: We are holding the lock, so we can safely access the value.
+            value: unsafe { &mut *self.value.get() },
+            _not_send: PhantomData,
         }
-
-        unsafe { (Strategy::do_lock(data), context) }
     }
 
-    unsafe fn do_unlock(data: &Self::StrategyData, context: &mut Self::GuardContext) {
-        unsafe {
-            Strategy::do_unlock(data, &mut context.0);
+    pub fn lock_irq(&self) -> SpinIrqGuard<'_, T, R> {
+        let irq_state = arch::disable_irqs_save();
+        let guard = self.lock();
 
-            asm!(
-                "push {context}",
-                "popf",
-                context = in(reg) context.1,
-                options(nomem),
-            )
+        SpinIrqGuard {
+            guard,
+            irq_state: IrqStateGuard::new(irq_state),
+            _not_send: PhantomData,
         }
     }
 
-    unsafe fn do_temporary_unlock(data: &Self::StrategyData, context: &mut Self::GuardContext) {
-        unsafe { Strategy::do_unlock(data, &mut context.0) }
-    }
-
-    unsafe fn do_relock(data: &Self::StrategyData, context: &mut Self::GuardContext) {
-        unsafe { Strategy::do_relock(data, &mut context.0) }
+    pub fn get_mut(&mut self) -> &mut T {
+        // SAFETY: The exclusive access to the lock is guaranteed by the borrow checker.
+        unsafe { &mut *self.value.get() }
     }
 
-    unsafe fn is_locked(data: &Self::StrategyData) -> bool {
-        unsafe { Strategy::is_locked(data) }
-    }
+    fn do_lock(&self) {
+        eonix_preempt::disable();
 
-    unsafe fn try_lock(data: &Self::StrategyData) -> Option<Self::GuardContext> {
-        let mut irq_context: usize;
-        unsafe {
-            asm!(
-                "pushf",
-                "pop {context}",
-                "cli",
-                context = out(reg) irq_context,
-            );
+        while let Err(_) =
+            self.locked
+                .compare_exchange_weak(false, true, Ordering::Acquire, Ordering::Relaxed)
+        {
+            R::relax();
         }
+    }
+}
 
-        let lock_context = unsafe { Strategy::try_lock(data) };
-        lock_context.map(|lock_context| (lock_context, irq_context))
+impl<T, R> Clone for Spin<T, R>
+where
+    T: ?Sized + Clone,
+    R: Relax,
+{
+    fn clone(&self) -> Self {
+        Self::new(self.lock().clone())
     }
 }
+
+// SAFETY: As long as the value protected by the lock is able to be shared between threads,
+//         we can send the lock between threads.
+unsafe impl<T, R> Send for Spin<T, R> where T: ?Sized + Send {}
+
+// SAFETY: As long as the value protected by the lock is able to be shared between threads,
+//         we can provide exclusive access guarantees to the lock.
+unsafe impl<T, R> Sync for Spin<T, R> where T: ?Sized + Send {}

+ 114 - 0
crates/eonix_sync/src/spin/guard.rs

@@ -0,0 +1,114 @@
+use super::{Relax, Spin, SpinRelax};
+use crate::{marker::NotSend, UnlockableGuard, UnlockedGuard};
+use core::{
+    marker::PhantomData,
+    mem::ManuallyDrop,
+    ops::{Deref, DerefMut},
+};
+
+pub struct SpinGuard<'a, T, R = SpinRelax>
+where
+    T: ?Sized,
+{
+    pub(super) lock: &'a Spin<T, R>,
+    pub(super) value: &'a mut T,
+    /// We don't want this to be `Send` because we don't want to allow the guard to be
+    /// transferred to another thread since we have disabled the preemption on the local cpu.
+    pub(super) _not_send: PhantomData<NotSend>,
+}
+
+pub struct UnlockedSpinGuard<'a, T, R>(&'a Spin<T, R>)
+where
+    T: ?Sized;
+
+// SAFETY: As long as the value protected by the lock is able to be shared between threads,
+//         we can access the guard from multiple threads.
+unsafe impl<T, R> Sync for SpinGuard<'_, T, R> where T: ?Sized + Sync {}
+
+impl<T, R> Drop for SpinGuard<'_, T, R>
+where
+    T: ?Sized,
+{
+    fn drop(&mut self) {
+        unsafe {
+            // SAFETY: We are dropping the guard, so we are not holding the lock anymore.
+            self.lock.do_unlock();
+        }
+    }
+}
+
+impl<T, R> Deref for SpinGuard<'_, T, R>
+where
+    T: ?Sized,
+{
+    type Target = T;
+
+    fn deref(&self) -> &Self::Target {
+        // SAFETY: We are holding the lock, so we can safely access the value.
+        self.value
+    }
+}
+
+impl<T, R> DerefMut for SpinGuard<'_, T, R>
+where
+    T: ?Sized,
+{
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        // SAFETY: We are holding the lock, so we can safely access the value.
+        self.value
+    }
+}
+
+impl<T, U, R> AsRef<U> for SpinGuard<'_, T, R>
+where
+    T: ?Sized,
+    U: ?Sized,
+    <Self as Deref>::Target: AsRef<U>,
+{
+    fn as_ref(&self) -> &U {
+        self.deref().as_ref()
+    }
+}
+
+impl<T, U, R> AsMut<U> for SpinGuard<'_, T, R>
+where
+    T: ?Sized,
+    U: ?Sized,
+    <Self as Deref>::Target: AsMut<U>,
+{
+    fn as_mut(&mut self) -> &mut U {
+        self.deref_mut().as_mut()
+    }
+}
+
+impl<'a, T, R> UnlockableGuard for SpinGuard<'a, T, R>
+where
+    T: ?Sized + Send,
+    R: Relax,
+{
+    type Unlocked = UnlockedSpinGuard<'a, T, R>;
+
+    fn unlock(self) -> Self::Unlocked {
+        let me = ManuallyDrop::new(self);
+        unsafe {
+            // SAFETY: No access is possible after unlocking.
+            me.lock.do_unlock();
+        }
+
+        UnlockedSpinGuard(me.lock)
+    }
+}
+
+// SAFETY: The guard is stateless so no more process needed.
+unsafe impl<'a, T, R> UnlockedGuard for UnlockedSpinGuard<'a, T, R>
+where
+    T: ?Sized + Send,
+    R: Relax,
+{
+    type Guard = SpinGuard<'a, T, R>;
+
+    async fn relock(self) -> Self::Guard {
+        let Self(lock) = self;
+        lock.lock()
+    }
+}

+ 17 - 0
crates/eonix_sync/src/spin/relax.rs

@@ -0,0 +1,17 @@
+pub trait Relax {
+    fn relax();
+}
+
+#[derive(Default, Debug, Clone, Copy)]
+pub struct LoopRelax;
+impl Relax for LoopRelax {
+    fn relax() {}
+}
+
+#[derive(Default, Debug, Clone, Copy)]
+pub struct SpinRelax;
+impl Relax for SpinRelax {
+    fn relax() {
+        core::hint::spin_loop();
+    }
+}

+ 124 - 0
crates/eonix_sync/src/spin/spin_irq.rs

@@ -0,0 +1,124 @@
+use super::{Relax, SpinGuard, SpinRelax, UnlockedSpinGuard};
+use crate::{marker::NotSend, UnlockableGuard, UnlockedGuard};
+use core::{
+    marker::PhantomData,
+    mem::ManuallyDrop,
+    ops::{Deref, DerefMut},
+};
+
+pub(super) struct IrqStateGuard(ManuallyDrop<arch::IrqState>);
+
+pub struct SpinIrqGuard<'a, T, R = SpinRelax>
+where
+    T: ?Sized,
+{
+    pub(super) guard: SpinGuard<'a, T, R>,
+    pub(super) irq_state: IrqStateGuard,
+    /// We don't want this to be `Send` because we don't want to allow the guard to be
+    /// transferred to another thread since we have disabled the preemption and saved
+    /// IRQ states on the local cpu.
+    pub(super) _not_send: PhantomData<NotSend>,
+}
+
+pub struct UnlockedSpinIrqGuard<'a, T, R>
+where
+    T: ?Sized,
+{
+    unlocked_guard: UnlockedSpinGuard<'a, T, R>,
+    irq_state: IrqStateGuard,
+}
+
+// SAFETY: As long as the value protected by the lock is able to be shared between threads,
+//         we can access the guard from multiple threads.
+unsafe impl<T, R> Sync for SpinIrqGuard<'_, T, R> where T: ?Sized + Sync {}
+
+impl IrqStateGuard {
+    pub const fn new(irq_state: arch::IrqState) -> Self {
+        Self(ManuallyDrop::new(irq_state))
+    }
+}
+
+impl Drop for IrqStateGuard {
+    fn drop(&mut self) {
+        let Self(irq_state) = self;
+
+        unsafe {
+            // SAFETY: We are dropping the guard, so we are never going to access the value.
+            ManuallyDrop::take(irq_state).restore();
+        }
+    }
+}
+
+impl<T, R> Deref for SpinIrqGuard<'_, T, R>
+where
+    T: ?Sized,
+{
+    type Target = T;
+
+    fn deref(&self) -> &Self::Target {
+        self.guard.deref()
+    }
+}
+
+impl<T, R> DerefMut for SpinIrqGuard<'_, T, R>
+where
+    T: ?Sized,
+{
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        self.guard.deref_mut()
+    }
+}
+
+impl<T, U, R> AsRef<U> for SpinIrqGuard<'_, T, R>
+where
+    T: ?Sized,
+    U: ?Sized,
+    <Self as Deref>::Target: AsRef<U>,
+{
+    fn as_ref(&self) -> &U {
+        self.deref().as_ref()
+    }
+}
+
+impl<T, U, R> AsMut<U> for SpinIrqGuard<'_, T, R>
+where
+    T: ?Sized,
+    U: ?Sized,
+    <Self as Deref>::Target: AsMut<U>,
+{
+    fn as_mut(&mut self) -> &mut U {
+        self.deref_mut().as_mut()
+    }
+}
+
+impl<'a, T, R> UnlockableGuard for SpinIrqGuard<'a, T, R>
+where
+    T: ?Sized + Send,
+    R: Relax,
+{
+    type Unlocked = UnlockedSpinIrqGuard<'a, T, R>;
+
+    fn unlock(self) -> Self::Unlocked {
+        UnlockedSpinIrqGuard {
+            unlocked_guard: self.guard.unlock(),
+            irq_state: self.irq_state,
+        }
+    }
+}
+
+// SAFETY: The guard is stateless so no more process needed.
+unsafe impl<'a, T, R> UnlockedGuard for UnlockedSpinIrqGuard<'a, T, R>
+where
+    T: ?Sized + Send,
+    R: Relax,
+{
+    type Guard = SpinIrqGuard<'a, T, R>;
+
+    async fn relock(self) -> Self::Guard {
+        SpinIrqGuard {
+            guard: self.unlocked_guard.relock().await,
+            irq_state: self.irq_state,
+            _not_send: PhantomData,
+        }
+    }
+}

+ 0 - 45
crates/eonix_sync/src/strategy.rs

@@ -1,45 +0,0 @@
-pub unsafe trait LockStrategy {
-    type StrategyData;
-    type GuardContext;
-
-    fn new_data() -> Self::StrategyData
-    where
-        Self: Sized;
-
-    unsafe fn is_locked(data: &Self::StrategyData) -> bool
-    where
-        Self: Sized;
-
-    unsafe fn try_lock(data: &Self::StrategyData) -> Option<Self::GuardContext>
-    where
-        Self: Sized;
-
-    unsafe fn do_lock(data: &Self::StrategyData) -> Self::GuardContext
-    where
-        Self: Sized;
-
-    unsafe fn do_unlock(data: &Self::StrategyData, context: &mut Self::GuardContext)
-    where
-        Self: Sized;
-
-    unsafe fn do_lock_shared(data: &Self::StrategyData) -> Self::GuardContext
-    where
-        Self: Sized,
-    {
-        unsafe { Self::do_lock(data) }
-    }
-
-    unsafe fn do_temporary_unlock(data: &Self::StrategyData, context: &mut Self::GuardContext)
-    where
-        Self: Sized,
-    {
-        unsafe { Self::do_unlock(data, context) }
-    }
-
-    unsafe fn do_relock(data: &Self::StrategyData, context: &mut Self::GuardContext)
-    where
-        Self: Sized,
-    {
-        *context = unsafe { Self::do_lock(data) };
-    }
-}

+ 122 - 0
crates/eonix_sync/src/wait_list.rs

@@ -0,0 +1,122 @@
+mod wait_handle;
+mod wait_object;
+
+use crate::{LazyLock, Spin};
+use core::fmt;
+use intrusive_collections::{linked_list::CursorMut, LinkedList};
+use wait_object::{WaitObject, WaitObjectAdapter};
+
+pub use wait_handle::WaitHandle;
+
+pub struct WaitList {
+    /// # Lock
+    /// `WaitList`s might be used in IRQ handlers, so `lock_irq` should
+    /// be used on `waiters`.
+    waiters: LazyLock<Spin<LinkedList<WaitObjectAdapter>>>,
+}
+
+impl WaitList {
+    pub const fn new() -> Self {
+        Self {
+            waiters: LazyLock::new(|| Spin::new(LinkedList::new(WaitObjectAdapter::new()))),
+        }
+    }
+
+    pub fn has_waiters(&self) -> bool {
+        !self.waiters.lock_irq().is_empty()
+    }
+
+    pub fn notify_one(&self) -> bool {
+        let mut waiters = self.waiters.lock_irq();
+        let mut waiter = waiters.front_mut();
+
+        if !waiter.is_null() {
+            unsafe {
+                // SAFETY: `waiter` is not null.
+                self.notify_waiter_unchecked(&mut waiter);
+            }
+
+            true
+        } else {
+            false
+        }
+    }
+
+    pub fn notify_all(&self) -> usize {
+        let mut waiters = self.waiters.lock_irq();
+        let mut waiter = waiters.front_mut();
+        let mut count = 0;
+
+        while !waiter.is_null() {
+            unsafe {
+                // SAFETY: `waiter` is not null.
+                self.notify_waiter_unchecked(&mut waiter);
+            }
+            count += 1;
+        }
+
+        count
+    }
+
+    pub fn prepare_to_wait(&self) -> WaitHandle<'_> {
+        WaitHandle::new(self)
+    }
+}
+
+impl WaitList {
+    unsafe fn notify_waiter_unchecked(&self, waiter: &mut CursorMut<'_, WaitObjectAdapter>) {
+        let wait_object = unsafe {
+            // SAFETY: The caller guarantees that `waiter` should be `Some`.
+            //         `wait_object` is a valid reference to a `WaitObject` because we
+            //         won't drop the wait object until the waiting thread will be woken
+            //         up and make sure that it is not on the list.
+            waiter.get().unwrap_unchecked()
+        };
+
+        wait_object.set_woken_up();
+
+        if let Some(waker) = wait_object.take_waker() {
+            waker.wake();
+        }
+
+        // Acknowledge the wait object that we're done.
+        unsafe {
+            waiter.remove().unwrap_unchecked().clear_wait_list();
+        }
+    }
+
+    pub(self) fn notify_waiter(&self, wait_object: &WaitObject) {
+        let mut waiters = self.waiters.lock_irq();
+        if !wait_object.on_list() {
+            return;
+        }
+
+        assert_eq!(
+            wait_object.wait_list(),
+            self,
+            "Wait object is not in the wait list."
+        );
+
+        let mut waiter = unsafe {
+            // SAFETY: `wait_object` is on the `waiters` list.
+            waiters.cursor_mut_from_ptr(wait_object)
+        };
+
+        unsafe {
+            // SAFETY: We got the cursor from a valid wait object, which can't be null.
+            self.notify_waiter_unchecked(&mut waiter);
+        }
+    }
+}
+
+impl Default for WaitList {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl fmt::Debug for WaitList {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("WaitList").finish()
+    }
+}

+ 221 - 0
crates/eonix_sync/src/wait_list/wait_handle.rs

@@ -0,0 +1,221 @@
+use super::{wait_object::WaitObject, WaitList};
+use core::{
+    cell::UnsafeCell,
+    hint::spin_loop,
+    pin::Pin,
+    task::{Context, Poll, Waker},
+};
+use intrusive_collections::UnsafeRef;
+
+pub struct WaitHandle<'a> {
+    wait_list: &'a WaitList,
+    wait_object: UnsafeCell<WaitObject>,
+    state: State,
+}
+
+#[derive(Debug, PartialEq)]
+enum State {
+    Init,
+    OnList,
+    WakerSet,
+    WokenUp,
+}
+
+struct PrepareSplit<'a> {
+    wait_list: &'a WaitList,
+    state: &'a mut State,
+    wait_object: Pin<&'a WaitObject>,
+}
+
+// SAFETY: All access to `wait_object` is protected.
+unsafe impl Sync for WaitHandle<'_> {}
+
+impl<'a> WaitHandle<'a> {
+    pub const fn new(wait_list: &'a WaitList) -> Self {
+        Self {
+            wait_list,
+            wait_object: UnsafeCell::new(WaitObject::new(wait_list)),
+            state: State::Init,
+        }
+    }
+
+    fn wait_object(&self) -> &WaitObject {
+        // SAFETY: We never get mutable references to a `WaitObject`.
+        unsafe { &*self.wait_object.get() }
+    }
+
+    fn split_borrow(self: Pin<&mut Self>) -> PrepareSplit<'_> {
+        unsafe {
+            // SAFETY: `wait_list` and `state` is `Unpin`.
+            let this = self.get_unchecked_mut();
+
+            // SAFETY: `wait_object` is a field of a pinned struct.
+            //         And we never get mutable references to a `WaitObject`.
+            let wait_object = Pin::new_unchecked(&*this.wait_object.get());
+
+            PrepareSplit {
+                wait_list: this.wait_list,
+                state: &mut this.state,
+                wait_object,
+            }
+        }
+    }
+
+    fn set_state(self: Pin<&mut Self>, state: State) {
+        unsafe {
+            // SAFETY: We only touch `state`, which is `Unpin`.
+            let this = self.get_unchecked_mut();
+            this.state = state;
+        }
+    }
+
+    fn wait_until_off_list(&self) {
+        while self.wait_object().on_list() {
+            spin_loop();
+        }
+    }
+
+    /// # Returns
+    /// Whether we've been woken up or not.
+    fn do_add_to_wait_list(mut self: Pin<&mut Self>, waker: Option<&Waker>) -> bool {
+        let PrepareSplit {
+            wait_list,
+            state,
+            wait_object,
+        } = self.as_mut().split_borrow();
+
+        let wait_object_ref = unsafe {
+            // SAFETY: `wait_object` is a valid reference to a `WaitObject` because we
+            //         won't drop the wait object until the waiting thread will be woken
+            //         up and make sure that it is not on the list.
+            //
+            // SAFETY: `wait_object` is a pinned reference to a `WaitObject`, so we can
+            //         safely convert it to a `Pin<UnsafeRef<WaitObject>>`.
+            Pin::new_unchecked(UnsafeRef::from_raw(&raw const *wait_object))
+        };
+
+        match *state {
+            State::Init => {
+                let mut waiters = wait_list.waiters.lock_irq();
+                waiters.push_back(wait_object_ref);
+
+                if let Some(waker) = waker.cloned() {
+                    wait_object.save_waker(waker);
+                    *state = State::WakerSet;
+                } else {
+                    *state = State::OnList;
+                }
+
+                return false;
+            }
+            // We are already on the wait list, so we can just set the waker.
+            State::OnList => {
+                // If we are already woken up, we can just return.
+                if wait_object.woken_up() {
+                    *state = State::WokenUp;
+                    return true;
+                }
+
+                if let Some(waker) = waker {
+                    // Lock the waker and check if it is already set.
+                    let waker_set = wait_object.save_waker_if_not_woken_up(&waker);
+
+                    if waker_set {
+                        *state = State::WakerSet;
+                    } else {
+                        // We are already woken up, so we can just return.
+                        *state = State::WokenUp;
+                        return true;
+                    }
+                }
+
+                return false;
+            }
+            _ => unreachable!("Invalid state."),
+        }
+    }
+
+    pub fn add_to_wait_list(self: Pin<&mut Self>) {
+        self.do_add_to_wait_list(None);
+    }
+
+    /// # Safety
+    /// The caller MUST guarantee that the last use of the returned function
+    /// is before `self` is dropped. Otherwise the value referred to in this
+    /// function will be dangling and will cause undefined behavior.
+    pub unsafe fn get_waker_function(self: Pin<&Self>) -> impl Fn() + Send + Sync + 'static {
+        let wait_list: &WaitList = unsafe {
+            // SAFETY: The caller guarantees that the last use of returned function
+            //         is before `self` is dropped.
+            &*(self.wait_list as *const _)
+        };
+
+        let wait_object = unsafe {
+            // SAFETY: The caller guarantees that the last use of returned function
+            //         is before `self` is dropped.
+            &*self.wait_object.get()
+        };
+
+        move || {
+            wait_list.notify_waiter(wait_object);
+        }
+    }
+}
+
+impl Future for WaitHandle<'_> {
+    type Output = ();
+
+    fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
+        match self.state {
+            State::Init | State::OnList => {
+                if self.as_mut().do_add_to_wait_list(Some(cx.waker())) {
+                    self.wait_until_off_list();
+                    Poll::Ready(())
+                } else {
+                    Poll::Pending
+                }
+            }
+            State::WakerSet => {
+                if !self.as_ref().wait_object().woken_up() {
+                    // If we read `woken_up == false`, we can guarantee that we have a spurious
+                    // wakeup. In this case, we MUST be still on the wait list, so no more
+                    // actions are required.
+                    Poll::Pending
+                } else {
+                    self.wait_until_off_list();
+                    self.set_state(State::WokenUp);
+                    Poll::Ready(())
+                }
+            }
+            State::WokenUp => Poll::Ready(()),
+        }
+    }
+}
+
+impl Drop for WaitHandle<'_> {
+    fn drop(&mut self) {
+        if matches!(self.state, State::Init | State::WokenUp) {
+            return;
+        }
+
+        let wait_object = self.wait_object();
+        if wait_object.woken_up() {
+            // We've woken up by someone. It won't be long before they
+            // remove us from the list. So spin until we are off the list.
+            // And we're done.
+            self.wait_until_off_list();
+        } else {
+            // Lock the list and try again.
+            let mut waiters = self.wait_list.waiters.lock_irq();
+
+            if wait_object.on_list() {
+                let mut cursor = unsafe {
+                    // SAFETY: The list is locked so no one could be polling nodes
+                    //         off while we are trying to remove it.
+                    waiters.cursor_mut_from_ptr(wait_object)
+                };
+                assert!(cursor.remove().is_some());
+            }
+        }
+    }
+}

+ 107 - 0
crates/eonix_sync/src/wait_list/wait_object.rs

@@ -0,0 +1,107 @@
+use super::WaitList;
+use crate::Spin;
+use core::{
+    cell::UnsafeCell,
+    marker::PhantomPinned,
+    pin::Pin,
+    ptr::null_mut,
+    sync::atomic::{AtomicBool, AtomicPtr, Ordering},
+    task::Waker,
+};
+use intrusive_collections::{intrusive_adapter, LinkedListAtomicLink, UnsafeRef};
+
+intrusive_adapter!(
+    pub WaitObjectAdapter = Pin<UnsafeRef<WaitObject>>:
+    WaitObject { link: LinkedListAtomicLink }
+);
+
+pub struct WaitObject {
+    woken_up: AtomicBool,
+    /// Separation of the field `waker` from its lock is basically due to the
+    /// consideration of space. We hope that the object can fit into a cacheline
+    /// and `woken_up` takes only 1 byte where the rest 7 bytes can accomodate 1
+    /// extra byte required for a spinlock.
+    waker_lock: Spin<()>,
+    waker: UnsafeCell<Option<Waker>>,
+    wait_list: AtomicPtr<WaitList>,
+    link: LinkedListAtomicLink,
+    _pinned: PhantomPinned,
+}
+
+// SAFETY: `WaitObject` is `Sync` because we sync the `waker` access with a spinlock.
+unsafe impl Sync for WaitObject {}
+
+impl WaitObject {
+    pub const fn new(wait_list: &WaitList) -> Self {
+        Self {
+            woken_up: AtomicBool::new(false),
+            waker_lock: Spin::new(()),
+            waker: UnsafeCell::new(None),
+            wait_list: AtomicPtr::new(wait_list as *const _ as *mut _),
+            link: LinkedListAtomicLink::new(),
+            _pinned: PhantomPinned,
+        }
+    }
+
+    pub fn save_waker(&self, waker: Waker) {
+        let _lock = self.waker_lock.lock_irq();
+        unsafe {
+            // SAFETY: We're holding the waker lock.
+            let old_waker = (*self.waker.get()).replace(waker);
+            assert!(old_waker.is_none(), "Waker already set.");
+        }
+    }
+
+    /// Save the waker if the wait object was not woken up atomically.
+    ///
+    /// # Returns
+    /// Whether the waker was saved.
+    pub fn save_waker_if_not_woken_up(&self, waker: &Waker) -> bool {
+        let _lock = self.waker_lock.lock_irq();
+        if self.woken_up() {
+            return false;
+        }
+
+        unsafe {
+            // SAFETY: We're holding the waker lock.
+            let old_waker = (*self.waker.get()).replace(waker.clone());
+            assert!(old_waker.is_none(), "Waker already set.");
+        }
+
+        true
+    }
+
+    pub fn take_waker(&self) -> Option<Waker> {
+        let _lock = self.waker_lock.lock_irq();
+        unsafe {
+            // SAFETY: We're holding the waker lock.
+            self.waker.get().as_mut().unwrap().take()
+        }
+    }
+
+    /// Check whether someone had woken up the wait object.
+    ///
+    /// Does an `Acquire` operation.
+    pub fn woken_up(&self) -> bool {
+        self.woken_up.load(Ordering::Acquire)
+    }
+
+    /// Set the wait object as woken up.
+    ///
+    /// Does a `Release` operation.
+    pub fn set_woken_up(&self) {
+        self.woken_up.store(true, Ordering::Release);
+    }
+
+    pub fn wait_list(&self) -> *const WaitList {
+        self.wait_list.load(Ordering::Acquire)
+    }
+
+    pub fn clear_wait_list(&self) {
+        self.wait_list.store(null_mut(), Ordering::Release);
+    }
+
+    pub fn on_list(&self) -> bool {
+        !self.wait_list.load(Ordering::Acquire).is_null()
+    }
+}

+ 6 - 0
crates/intrusive_list/Cargo.toml

@@ -0,0 +1,6 @@
+[package]
+name = "intrusive_list"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]

+ 59 - 0
crates/intrusive_list/src/lib.rs

@@ -0,0 +1,59 @@
+#![no_std]
+
+use core::ptr::NonNull;
+
+pub struct Link {
+    prev: Option<NonNull<Link>>,
+    next: Option<NonNull<Link>>,
+}
+
+impl Link {
+    pub const fn new() -> Self {
+        Self {
+            prev: None,
+            next: None,
+        }
+    }
+
+    pub fn insert(&mut self, node: &mut Self) {
+        unsafe {
+            let insert_node = NonNull::new(&raw mut *node);
+            if let Some(next) = self.next {
+                (*next.as_ptr()).prev = insert_node;
+            }
+            node.next = self.next;
+            node.prev = NonNull::new(&raw mut *self);
+            self.next = insert_node;
+        }
+    }
+
+    pub fn remove(&mut self) {
+        if let Some(next) = self.next {
+            unsafe { (*next.as_ptr()).prev = self.prev };
+        }
+
+        if let Some(prev) = self.prev {
+            unsafe { (*prev.as_ptr()).next = self.next };
+        }
+
+        self.prev = None;
+        self.next = None;
+    }
+
+    pub fn next(&self) -> Option<&Self> {
+        self.next.map(|node| unsafe { &*node.as_ptr() })
+    }
+
+    pub fn next_mut(&mut self) -> Option<&mut Self> {
+        self.next.map(|node| unsafe { &mut *node.as_ptr() })
+    }
+}
+
+#[macro_export]
+macro_rules! container_of {
+    ($ptr:expr, $type:ty, $($f:tt)*) => {{
+        let ptr = $ptr as *const _ as *const u8;
+        let offset: usize = ::core::mem::offset_of!($type, $($f)*);
+        ::core::ptr::NonNull::new_unchecked(ptr.sub(offset) as *mut $type)
+    }}
+}

+ 6 - 0
crates/posix_types/Cargo.toml

@@ -0,0 +1,6 @@
+[package]
+name = "posix_types"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]

+ 4 - 0
crates/posix_types/src/lib.rs

@@ -0,0 +1,4 @@
+#![no_std]
+
+pub mod result;
+pub mod signal;

+ 1 - 0
crates/posix_types/src/result.rs

@@ -0,0 +1 @@
+pub enum PosixError {}

+ 3 - 0
crates/posix_types/src/signal.rs

@@ -0,0 +1,3 @@
+mod sig_action;
+
+pub use sig_action::{SigAction, TryFromSigAction};

+ 106 - 0
crates/posix_types/src/signal/sig_action.rs

@@ -0,0 +1,106 @@
+#[repr(C, packed)]
+#[derive(Debug, Clone, Copy)]
+pub struct SigAction {
+    sa_handler: u32,
+    sa_flags: u32,
+    sa_restorer: u32,
+    sa_mask: u64,
+}
+
+pub trait TryFromSigAction: Sized {
+    type Error;
+
+    fn default() -> Self;
+    fn ignore() -> Self;
+    fn new() -> Self;
+
+    fn set_siginfo(self) -> Result<Self, Self::Error>;
+    fn handler(self, handler: usize) -> Result<Self, Self::Error>;
+    fn restorer(self, restorer: usize) -> Result<Self, Self::Error>;
+    fn mask(self, mask: u64) -> Result<Self, Self::Error>;
+}
+
+const SIG_DFL: u32 = 0;
+const SIG_IGN: u32 = 1;
+
+const SA_SIGINFO: u32 = 4;
+const SA_RESTORER: u32 = 0x04000000;
+
+impl SigAction {
+    pub const fn default() -> Self {
+        Self {
+            sa_handler: SIG_DFL,
+            sa_flags: 0,
+            sa_restorer: 0,
+            sa_mask: 0,
+        }
+    }
+
+    pub const fn ignore() -> Self {
+        Self {
+            sa_handler: SIG_IGN,
+            sa_flags: 0,
+            sa_restorer: 0,
+            sa_mask: 0,
+        }
+    }
+
+    pub const fn new() -> Self {
+        Self {
+            sa_handler: 0,
+            sa_flags: 0,
+            sa_restorer: 0,
+            sa_mask: 0,
+        }
+    }
+
+    pub const fn handler(self, handler: usize) -> Self {
+        Self {
+            sa_handler: handler as u32,
+            ..self
+        }
+    }
+
+    pub const fn restorer(self, restorer: usize) -> Self {
+        Self {
+            sa_restorer: restorer as u32,
+            sa_flags: self.sa_flags | SA_RESTORER,
+            ..self
+        }
+    }
+
+    pub const fn mask(self, mask: u64) -> Self {
+        Self {
+            sa_mask: mask,
+            ..self
+        }
+    }
+
+    pub fn try_into<T>(self) -> Result<T, T::Error>
+    where
+        T: TryFromSigAction,
+    {
+        match self.sa_handler {
+            SIG_DFL => Ok(T::default()),
+            SIG_IGN => Ok(T::ignore()),
+            _ => {
+                let mut action = T::new();
+                if self.sa_flags & SA_SIGINFO != 0 {
+                    action = action.set_siginfo()?;
+                }
+
+                action = action.handler(self.sa_handler as usize)?;
+                action = action.restorer(self.sa_restorer as usize)?;
+                action = action.mask(self.sa_mask)?;
+
+                Ok(action)
+            }
+        }
+    }
+}
+
+impl Default for SigAction {
+    fn default() -> Self {
+        Self::default()
+    }
+}

+ 1 - 1
rust-toolchain

@@ -1 +1 @@
-nightly
+nightly-2025-03-22

+ 3 - 5
src/driver/ahci/command.rs

@@ -1,8 +1,6 @@
-use crate::prelude::*;
-
-use crate::kernel::mem::paging::Page;
-
 use super::bindings::EINVAL;
+use crate::kernel::mem::paging::Page;
+use crate::prelude::*;
 
 pub trait Command {
     fn pages(&self) -> &[Page];
@@ -22,7 +20,7 @@ pub struct IdentifyCommand {
 impl IdentifyCommand {
     pub fn new() -> Self {
         Self {
-            page: Page::alloc_one(),
+            page: Page::alloc(),
         }
     }
 }

+ 48 - 0
src/driver/ahci/command_table.rs

@@ -0,0 +1,48 @@
+use super::{command::Command, PRDTEntry, FISH2D};
+use crate::kernel::mem::{AsMemoryBlock as _, Page};
+use eonix_mm::address::PAddr;
+
+pub struct CommandTable<'a> {
+    page: Page,
+    command_fis: &'a mut FISH2D,
+
+    prdt: &'a mut [PRDTEntry; 248],
+    prdt_entries: Option<u16>,
+}
+
+impl CommandTable<'_> {
+    pub fn new() -> Self {
+        let page = Page::alloc();
+        let memory = page.as_memblk();
+
+        let (lhs, prdt) = memory.split_at(0x80);
+
+        let (command_fis, _) = lhs.split_at(size_of::<FISH2D>());
+        let command_fis = unsafe { command_fis.as_ptr().as_mut() };
+        let prdt = unsafe { prdt.as_ptr().as_mut() };
+
+        Self {
+            page,
+            command_fis,
+            prdt,
+            prdt_entries: None,
+        }
+    }
+
+    pub fn setup(&mut self, cmd: &impl Command) {
+        self.command_fis.setup(cmd.cmd(), cmd.lba(), cmd.count());
+        self.prdt_entries = Some(cmd.pages().len() as u16);
+
+        for (idx, page) in cmd.pages().iter().enumerate() {
+            self.prdt[idx].setup(page);
+        }
+    }
+
+    pub fn prdt_len(&self) -> u16 {
+        self.prdt_entries.unwrap()
+    }
+
+    pub fn base(&self) -> PAddr {
+        self.page.start()
+    }
+}

+ 11 - 8
src/driver/ahci/control.rs

@@ -1,6 +1,7 @@
-use crate::kernel::mem::phys::{NoCachePP, PhysPtr};
-
 use super::{BitsIterator, GHC_IE};
+use crate::{kernel::mem::PhysAccess as _, sync::fence::memory_barrier};
+use core::ptr::NonNull;
+use eonix_mm::address::PAddr;
 
 /// An `AdapterControl` is an HBA device Global Host Control block
 ///
@@ -34,7 +35,7 @@ const CONTROL_IS: usize = 2;
 const CONTROL_PI: usize = 3;
 
 pub struct AdapterControl {
-    inner: *mut u32,
+    control_data: NonNull<u32>,
 }
 
 /// # Safety
@@ -42,25 +43,26 @@ pub struct AdapterControl {
 unsafe impl Send for AdapterControl {}
 
 impl AdapterControl {
-    pub fn new(addr: usize) -> Self {
+    pub fn new(addr: PAddr) -> Self {
         Self {
-            inner: NoCachePP::new(addr).as_ptr(),
+            control_data: unsafe { addr.as_ptr() },
         }
     }
 }
 
 impl AdapterControl {
     fn read(&self, off: usize) -> u32 {
-        unsafe { self.inner.offset(off as isize).read_volatile() }
+        unsafe { self.control_data.offset(off as isize).read_volatile() }
     }
 
     fn write(&self, off: usize, value: u32) {
-        unsafe { self.inner.offset(off as isize).write_volatile(value) }
+        unsafe { self.control_data.offset(off as isize).write_volatile(value) }
     }
 
     pub fn enable_interrupts(&self) {
         let ghc = self.read(CONTROL_GHC);
         self.write(CONTROL_GHC, ghc | GHC_IE);
+        memory_barrier();
     }
 
     pub fn implemented_ports(&self) -> BitsIterator {
@@ -72,6 +74,7 @@ impl AdapterControl {
     }
 
     pub fn clear_interrupt(&self, no: u32) {
-        self.write(CONTROL_IS, 1 << no)
+        self.write(CONTROL_IS, 1 << no);
+        memory_barrier();
     }
 }

+ 4 - 1
src/driver/ahci/defs.rs

@@ -1,6 +1,8 @@
 #![allow(dead_code)]
 
 use crate::kernel::mem::paging::Page;
+use eonix_mm::address::Addr as _;
+
 pub const VENDOR_INTEL: u16 = 0x8086;
 pub const DEVICE_AHCI: u16 = 0x2922;
 
@@ -51,6 +53,7 @@ pub const PORT_IS_ERROR: u32 =
 /// `clear_busy_upon_ok` and `bytes_transferred` are volatile
 ///
 #[repr(C)]
+#[derive(Clone, Copy)]
 pub struct CommandHeader {
     // [0:4]: Command FIS length
     // [5]: ATAPI
@@ -237,7 +240,7 @@ pub struct PRDTEntry {
 
 impl PRDTEntry {
     pub fn setup(&mut self, page: &Page) {
-        self.base = page.as_phys() as u64;
+        self.base = page.start().addr() as u64;
         self._reserved1 = 0;
 
         // The last bit MUST be set to 1 according to the AHCI spec

+ 63 - 65
src/driver/ahci/mod.rs

@@ -7,20 +7,27 @@ use crate::{
     },
     prelude::*,
 };
-
 use alloc::{format, sync::Arc};
 use bindings::{
     kernel::hw::pci::{self, pci_device},
     EIO,
 };
 use control::AdapterControl;
+use core::ptr::NonNull;
 use defs::*;
+use eonix_mm::address::{AddrOps as _, PAddr};
 use port::AdapterPort;
 
+pub(self) use register::Register;
+
 mod command;
+mod command_table;
 mod control;
 mod defs;
 mod port;
+mod register;
+pub(self) mod slot;
+mod stats;
 
 pub struct BitsIterator {
     data: u32,
@@ -53,70 +60,23 @@ impl Iterator for BitsIterator {
     }
 }
 
-fn vread<T: Sized + Copy>(refval: *const T) -> T {
-    unsafe { refval.read_volatile() }
-}
-
-fn vwrite<T: Sized + Copy>(refval: *mut T, val: T) {
-    unsafe { refval.write_volatile(val) }
-}
-
-#[allow(dead_code)]
-struct Device {
-    control_base: usize,
+struct Device<'a> {
+    control_base: PAddr,
     control: AdapterControl,
     // TODO: impl Drop to free pci device
-    pcidev: *mut pci_device,
+    pcidev: NonNull<pci_device>,
     /// # Lock
     /// Might be accessed from irq handler, use with `lock_irq()`
-    ports: Spin<[Option<Arc<AdapterPort>>; 32]>,
+    ports: Spin<[Option<Arc<AdapterPort<'a>>>; 32]>,
 }
 
 /// # Safety
 /// `pcidev` is never accessed from Rust code
 /// TODO!!!: place *mut pci_device in a safe wrapper
-unsafe impl Send for Device {}
-unsafe impl Sync for Device {}
-
-impl Device {
-    fn probe_ports(&self) -> KResult<()> {
-        for nport in self.control.implemented_ports() {
-            let port = Arc::new(AdapterPort::new(self.control_base, nport));
-            if !port.status_ok() {
-                continue;
-            }
-
-            self.ports.lock_irq()[nport as usize] = Some(port.clone());
-            if let Err(e) = (|| -> KResult<()> {
-                port.init()?;
-
-                {
-                    let port = port.clone();
-                    let name = format!("ahci-p{}-stats", port.nport);
-                    procfs::populate_root(name.into_bytes().into(), move |buffer| {
-                        writeln!(&mut buffer.get_writer(), "{:?}", port.stats.lock().as_ref())
-                            .map_err(|_| EIO)
-                    })?;
-                }
-
-                let port = BlockDevice::register_disk(
-                    make_device(8, nport * 16),
-                    2147483647, // TODO: get size from device
-                    port,
-                )?;
-
-                port.partprobe()?;
-
-                Ok(())
-            })() {
-                self.ports.lock_irq()[nport as usize] = None;
-                println_warn!("probe port {nport} failed with {e}");
-            }
-        }
-
-        Ok(())
-    }
+unsafe impl Send for Device<'_> {}
+unsafe impl Sync for Device<'_> {}
 
+impl Device<'_> {
     fn handle_interrupt(&self) {
         // Safety
         // `self.ports` is accessed inside irq handler
@@ -128,7 +88,7 @@ impl Device {
             }
 
             let port = ports[nport as usize].as_ref().unwrap();
-            let status = vread(port.interrupt_status());
+            let status = port.interrupt_status().read_once();
 
             if status & PORT_IS_ERROR != 0 {
                 println_warn!("port {nport} SATA error");
@@ -136,7 +96,7 @@ impl Device {
             }
 
             debug_assert!(status & PORT_IS_DHRS != 0);
-            vwrite(port.interrupt_status(), PORT_IS_DHRS);
+            port.interrupt_status().write_once(PORT_IS_DHRS);
 
             self.control.clear_interrupt(nport);
 
@@ -145,19 +105,20 @@ impl Device {
     }
 }
 
-impl Device {
-    pub fn new(pcidev: *mut pci_device) -> KResult<Arc<Self>> {
-        let base = unsafe { *(*pcidev).header_type0() }.bars[PCI_REG_ABAR];
-        let irqno = unsafe { *(*pcidev).header_type0() }.interrupt_line;
+impl Device<'static> {
+    pub fn new(pcidev: NonNull<pci_device>) -> KResult<Arc<Self>> {
+        let base =
+            PAddr::from(unsafe { *pcidev.as_ref().header_type0() }.bars[PCI_REG_ABAR] as usize);
+        let irqno = unsafe { *pcidev.as_ref().header_type0() }.interrupt_line;
 
         // use MMIO
-        if base & 0xf != 0 {
+        if !base.is_aligned_to(16) {
             return Err(EIO);
         }
 
         let device = Arc::new(Device {
-            control_base: base as usize,
-            control: AdapterControl::new(base as usize),
+            control_base: base,
+            control: AdapterControl::new(base),
             pcidev,
             ports: Spin::new([const { None }; 32]),
         });
@@ -171,10 +132,47 @@ impl Device {
 
         Ok(device)
     }
+
+    fn probe_ports(&self) -> KResult<()> {
+        for nport in self.control.implemented_ports() {
+            let port = Arc::new(AdapterPort::new(self.control_base, nport));
+            if !port.status_ok() {
+                continue;
+            }
+
+            self.ports.lock_irq()[nport as usize] = Some(port.clone());
+            if let Err(e) = (|| -> KResult<()> {
+                port.init()?;
+
+                {
+                    let port = port.clone();
+                    let name = format!("ahci-p{}-stats", port.nport);
+                    procfs::populate_root(name.into_bytes().into(), move |buffer| {
+                        port.print_stats(&mut buffer.get_writer())
+                    })?;
+                }
+
+                let port = BlockDevice::register_disk(
+                    make_device(8, nport * 16),
+                    2147483647, // TODO: get size from device
+                    port,
+                )?;
+
+                port.partprobe()?;
+
+                Ok(())
+            })() {
+                self.ports.lock_irq()[nport as usize] = None;
+                println_warn!("probe port {nport} failed with {e}");
+            }
+        }
+
+        Ok(())
+    }
 }
 
 unsafe extern "C" fn probe_device(pcidev: *mut pci_device) -> i32 {
-    match Device::new(pcidev) {
+    match Device::new(NonNull::new(pcidev).expect("NULL `pci_device` pointer")) {
         Ok(device) => {
             // TODO!!!: save device to pci_device
             Box::leak(Box::new(device));

+ 115 - 225
src/driver/ahci/port.rs

@@ -1,35 +1,20 @@
-use alloc::collections::vec_deque::VecDeque;
-use bindings::{EINVAL, EIO};
-use eonix_preempt::assert_preempt_enabled;
-
-use crate::prelude::*;
-
-use crate::kernel::block::{BlockDeviceRequest, BlockRequestQueue};
-use crate::kernel::mem::paging::Page;
-
-use crate::kernel::mem::phys::{NoCachePP, PhysPtr};
-use crate::sync::UCondVar;
-
 use super::command::{Command, IdentifyCommand, ReadLBACommand};
+use super::slot::CommandSlot;
+use super::stats::AdapterPortStats;
 use super::{
-    vread, vwrite, CommandHeader, PRDTEntry, FISH2D, PORT_CMD_CR, PORT_CMD_FR, PORT_CMD_FRE,
-    PORT_CMD_ST, PORT_IE_DEFAULT,
+    CommandHeader, Register, PORT_CMD_CR, PORT_CMD_FR, PORT_CMD_FRE, PORT_CMD_ST, PORT_IE_DEFAULT,
 };
-
-fn spinwait_clear(refval: *const u32, mask: u32) -> KResult<()> {
-    const SPINWAIT_MAX: usize = 1000;
-
-    let mut spins = 0;
-    while vread(refval) & mask != 0 {
-        if spins == SPINWAIT_MAX {
-            return Err(EIO);
-        }
-
-        spins += 1;
-    }
-
-    Ok(())
-}
+use crate::driver::ahci::command_table::CommandTable;
+use crate::kernel::block::{BlockDeviceRequest, BlockRequestQueue};
+use crate::kernel::mem::paging::Page;
+use crate::kernel::mem::AsMemoryBlock as _;
+use crate::prelude::*;
+use alloc::collections::vec_deque::VecDeque;
+use bindings::{EINVAL, EIO};
+use core::pin::pin;
+use eonix_mm::address::{Addr as _, PAddr};
+use eonix_runtime::task::Task;
+use eonix_sync::WaitList;
 
 /// An `AdapterPort` is an HBA device in AHCI mode.
 ///
@@ -67,63 +52,6 @@ pub struct AdapterPortData {
     vendor: [u32; 4],
 }
 
-#[allow(dead_code)]
-#[derive(Debug, PartialEq, Eq, Clone, Copy)]
-enum SlotState {
-    Idle,
-    Working,
-    Finished,
-    Error,
-}
-
-struct CommandSlotInner {
-    state: SlotState,
-    /// # Usage
-    /// `cmdheader` might be used in irq handler. So in order to wait for
-    /// commands to finish, we should use `lock_irq` on `cmdheader`
-    cmdheader: *mut CommandHeader,
-}
-
-/// # Safety
-/// This is safe because the `cmdheader` is not shared between threads
-unsafe impl Send for CommandSlotInner {}
-
-impl CommandSlotInner {
-    pub fn setup(&mut self, cmdtable_base: u64, prdtlen: u16, write: bool) {
-        let cmdheader = unsafe { self.cmdheader.as_mut().unwrap() };
-        cmdheader.first = 0x05; // FIS type
-
-        if write {
-            cmdheader.first |= 0x40;
-        }
-
-        cmdheader.second = 0x00;
-
-        cmdheader.prdt_length = prdtlen;
-        cmdheader.bytes_transferred = 0;
-        cmdheader.command_table_base = cmdtable_base;
-
-        cmdheader._reserved = [0; 4];
-    }
-}
-
-struct CommandSlot {
-    inner: Spin<CommandSlotInner>,
-    cv: UCondVar,
-}
-
-impl CommandSlot {
-    fn new(cmdheader: *mut CommandHeader) -> Self {
-        Self {
-            inner: Spin::new(CommandSlotInner {
-                state: SlotState::Idle,
-                cmdheader,
-            }),
-            cv: UCondVar::new(),
-        }
-    }
-}
-
 struct FreeList {
     free: VecDeque<u32>,
     working: VecDeque<u32>,
@@ -138,109 +66,111 @@ impl FreeList {
     }
 }
 
-#[derive(Default, Debug)]
-pub struct AdapterPortStats {
-    /// Number of commands sent
-    cmd_sent: u64,
+pub struct AdapterPort<'a> {
+    pub nport: u32,
+    regs_base: PAddr,
 
-    /// Number of transmission errors
-    cmd_error: u64,
+    slots: [CommandSlot<'a>; 32],
+    free_list: Spin<FreeList>,
+    free_list_wait: WaitList,
 
-    /// Number of interrupts fired
-    int_fired: u64,
-}
+    /// Holds the command list.
+    /// **DO NOT USE IT DIRECTLY**
+    _page: Page,
 
-pub struct AdapterPort {
-    pub nport: u32,
-    regs: *mut (),
-    page: Page,
-    slots: [CommandSlot; 32],
-    free_list: Spin<FreeList>,
-    free_list_cv: UCondVar,
+    cmdlist_base: PAddr,
+    fis_base: PAddr,
 
-    /// Statistics for this port
-    pub stats: Spin<AdapterPortStats>,
+    stats: AdapterPortStats,
 }
 
-/// # Safety
-/// This is safe because the `AdapterPort` can be accessed by only one thread at the same time
-unsafe impl Send for AdapterPort {}
-unsafe impl Sync for AdapterPort {}
-
-impl AdapterPort {
-    pub fn new(base: usize, nport: u32) -> Self {
-        let page = Page::alloc_one();
-        let cmdheaders_start = page.as_cached().as_ptr::<CommandHeader>();
+impl<'a> AdapterPort<'a> {
+    pub fn new(base: PAddr, nport: u32) -> Self {
+        let page = Page::alloc();
+        let cmdlist_base = page.start();
+        let cmdlist_size = 32 * size_of::<CommandHeader>();
+        let fis_base = cmdlist_base + cmdlist_size;
+
+        let (mut cmdheaders, _) = page.as_memblk().split_at(cmdlist_size);
+        let slots = core::array::from_fn(move |_| {
+            let (cmdheader, next) = cmdheaders.split_at(size_of::<CommandHeader>());
+            cmdheaders = next;
+            CommandSlot::new(unsafe { cmdheader.as_ptr().as_mut() })
+        });
 
         Self {
             nport,
-            regs: NoCachePP::new(base + 0x100 + 0x80 * nport as usize).as_ptr(),
-            slots: core::array::from_fn(|index| {
-                CommandSlot::new(unsafe { cmdheaders_start.offset(index as isize) })
-            }),
+            regs_base: base + 0x100 + 0x80 * nport as usize,
+            slots,
             free_list: Spin::new(FreeList::new()),
-            free_list_cv: UCondVar::new(),
-            page,
-            stats: Spin::default(),
+            free_list_wait: WaitList::new(),
+            _page: page,
+            stats: AdapterPortStats::new(),
+            cmdlist_base,
+            fis_base,
         }
     }
 }
 
-impl AdapterPort {
-    fn command_list_base(&self) -> *mut u64 {
-        unsafe { self.regs.byte_offset(0x00).cast() }
+impl AdapterPort<'_> {
+    fn command_list_base(&self) -> Register<u64> {
+        Register::new(self.regs_base + 0x00)
     }
 
-    fn fis_base(&self) -> *mut u64 {
-        unsafe { self.regs.byte_offset(0x08).cast() }
+    fn fis_base(&self) -> Register<u64> {
+        Register::new(self.regs_base + 0x08)
     }
 
-    fn sata_status(&self) -> *mut u32 {
-        unsafe { self.regs.byte_offset(0x28).cast() }
+    fn sata_status(&self) -> Register<u32> {
+        Register::new(self.regs_base + 0x28)
     }
 
-    fn command_status(&self) -> *mut u32 {
-        unsafe { self.regs.byte_offset(0x18).cast() }
+    fn command_status(&self) -> Register<u32> {
+        Register::new(self.regs_base + 0x18)
     }
 
-    fn command_issue(&self) -> *mut u32 {
-        unsafe { self.regs.byte_offset(0x38).cast() }
+    fn command_issue(&self) -> Register<u32> {
+        Register::new(self.regs_base + 0x38)
     }
 
-    pub fn interrupt_status(&self) -> *mut u32 {
-        unsafe { self.regs.byte_offset(0x10).cast() }
+    pub fn interrupt_status(&self) -> Register<u32> {
+        Register::new(self.regs_base + 0x10)
     }
 
-    pub fn interrupt_enable(&self) -> *mut u32 {
-        unsafe { self.regs.byte_offset(0x14).cast() }
+    fn interrupt_enable(&self) -> Register<u32> {
+        Register::new(self.regs_base + 0x14)
     }
 
     pub fn status_ok(&self) -> bool {
-        vread(self.sata_status()) & 0xf == 0x3
+        self.sata_status().read_once() & 0xf == 0x3
     }
 
     fn get_free_slot(&self) -> u32 {
-        let mut free_list = self.free_list.lock_irq();
-
         loop {
-            match free_list.free.pop_front() {
-                Some(slot) => break slot,
-                None => self.free_list_cv.wait(&mut free_list),
-            };
+            let mut free_list = self.free_list.lock_irq();
+            let free_slot = free_list.free.pop_front();
+            if let Some(slot) = free_slot {
+                return slot;
+            }
+            let mut wait = pin!(self.free_list_wait.prepare_to_wait());
+            wait.as_mut().add_to_wait_list();
+            drop(free_list);
+
+            Task::block_on(wait);
         }
     }
 
     fn save_working(&self, slot: u32) {
-        self.free_list.lock().working.push_back(slot);
+        self.free_list.lock_irq().working.push_back(slot);
     }
 
     fn release_free_slot(&self, slot: u32) {
-        self.free_list.lock().free.push_back(slot);
-        self.free_list_cv.notify_one();
+        self.free_list.lock_irq().free.push_back(slot);
+        self.free_list_wait.notify_one();
     }
 
     pub fn handle_interrupt(&self) {
-        let ci = vread(self.command_issue());
+        let ci = self.command_issue().read_once();
 
         // no need to use `lock_irq()` inside interrupt handler
         let mut free_list = self.free_list.lock();
@@ -250,104 +180,55 @@ impl AdapterPort {
                 return true;
             }
 
-            let slot = &self.slots[n as usize];
-
-            // TODO: check error
-            let mut slot_inner = slot.inner.lock();
-            debug_assert_eq!(slot_inner.state, SlotState::Working);
-            slot_inner.state = SlotState::Finished;
-            slot.cv.notify_all();
-            self.stats.lock().int_fired += 1;
+            self.slots[n as usize].handle_irq();
+            self.stats.inc_int_fired();
 
             false
         });
     }
 
     fn stop_command(&self) -> KResult<()> {
-        vwrite(
-            self.command_status(),
-            vread(self.command_status()) & !(PORT_CMD_ST | PORT_CMD_FRE),
-        );
-
-        spinwait_clear(self.command_status(), PORT_CMD_CR | PORT_CMD_FR)
+        let status_reg = self.command_status();
+        let status = status_reg.read();
+        status_reg.write_once(status & !(PORT_CMD_ST | PORT_CMD_FRE));
+        status_reg.spinwait_clear(PORT_CMD_CR | PORT_CMD_FR)
     }
 
     fn start_command(&self) -> KResult<()> {
-        spinwait_clear(self.command_status(), PORT_CMD_CR)?;
+        let status_reg = self.command_status();
+        status_reg.spinwait_clear(PORT_CMD_CR)?;
 
-        let cmd_status = vread(self.command_status());
-        vwrite(
-            self.command_status(),
-            cmd_status | PORT_CMD_ST | PORT_CMD_FRE,
-        );
+        let status = status_reg.read();
+        status_reg.write_once(status | PORT_CMD_ST | PORT_CMD_FRE);
 
         Ok(())
     }
 
-    /// # Might Sleep
-    /// This function **might sleep**, so call it in a preemptible context
     fn send_command(&self, cmd: &impl Command) -> KResult<()> {
-        assert_preempt_enabled!("AdapterPort::send_command");
-
-        let pages = cmd.pages();
-        let cmdtable_page = Page::alloc_one();
-
-        let command_fis: &mut FISH2D = cmdtable_page.as_cached().as_mut();
-        command_fis.setup(cmd.cmd(), cmd.lba(), cmd.count());
+        let mut cmdtable = CommandTable::new();
+        cmdtable.setup(cmd);
 
-        let prdt: &mut [PRDTEntry; 248] = cmdtable_page.as_cached().offset(0x80).as_mut();
-
-        for (idx, page) in pages.iter().enumerate() {
-            prdt[idx].setup(page);
-        }
+        let slot_index = self.get_free_slot();
+        let slot = &self.slots[slot_index as usize];
 
-        let slot_index = self.get_free_slot() as usize;
-        let slot_object = &self.slots[slot_index];
+        slot.prepare_command(&cmdtable, cmd.write());
+        self.save_working(slot_index);
 
-        let mut slot = slot_object.inner.lock_irq();
-
-        slot.setup(
-            cmdtable_page.as_phys() as u64,
-            pages.len() as u16,
-            cmd.write(),
-        );
-        slot.state = SlotState::Working;
+        let cmdissue_reg = self.command_issue();
 
         // should we clear received fis here?
-        debug_assert!(vread(self.command_issue()) & (1 << slot_index) == 0);
-        vwrite(self.command_issue(), 1 << slot_index);
-
-        if spinwait_clear(self.command_issue(), 1 << slot_index).is_err() {
-            let mut saved = false;
-            while slot.state == SlotState::Working {
-                if !saved {
-                    saved = true;
-                    self.save_working(slot_index as u32);
-                }
-                slot_object.cv.wait(&mut slot);
-            }
-        } else {
-            // TODO: check error
-            slot.state = SlotState::Finished;
-        }
+        debug_assert!(cmdissue_reg.read_once() & (1 << slot_index) == 0);
+        cmdissue_reg.write_once(1 << slot_index);
 
-        let state = slot.state;
-        slot.state = SlotState::Idle;
+        self.stats.inc_cmd_sent();
 
-        debug_assert_ne!(state, SlotState::Working);
-        self.release_free_slot(slot_index as u32);
+        if let Err(_) = Task::block_on(slot.wait_finish()) {
+            self.stats.inc_cmd_error();
+            return Err(EIO);
+        };
 
-        match state {
-            SlotState::Finished => {
-                self.stats.lock().cmd_sent += 1;
-                Ok(())
-            }
-            SlotState::Error => {
-                self.stats.lock().cmd_error += 1;
-                Err(EIO)
-            }
-            _ => panic!("Invalid slot state"),
-        }
+        self.release_free_slot(slot_index);
+        Ok(())
     }
 
     fn identify(&self) -> KResult<()> {
@@ -362,10 +243,11 @@ impl AdapterPort {
     pub fn init(&self) -> KResult<()> {
         self.stop_command()?;
 
-        vwrite(self.interrupt_enable(), PORT_IE_DEFAULT);
+        self.command_list_base()
+            .write(self.cmdlist_base.addr() as u64);
+        self.fis_base().write(self.fis_base.addr() as u64);
 
-        vwrite(self.command_list_base(), self.page.as_phys() as u64);
-        vwrite(self.fis_base(), self.page.as_phys() as u64 + 0x400);
+        self.interrupt_enable().write_once(PORT_IE_DEFAULT);
 
         self.start_command()?;
 
@@ -377,9 +259,17 @@ impl AdapterPort {
             Ok(_) => Ok(()),
         }
     }
+
+    pub fn print_stats(&self, writer: &mut impl Write) -> KResult<()> {
+        writeln!(writer, "cmd_sent: {}", self.stats.get_cmd_sent()).map_err(|_| EIO)?;
+        writeln!(writer, "cmd_error: {}", self.stats.get_cmd_error()).map_err(|_| EIO)?;
+        writeln!(writer, "int_fired: {}", self.stats.get_int_fired()).map_err(|_| EIO)?;
+
+        Ok(())
+    }
 }
 
-impl BlockRequestQueue for AdapterPort {
+impl BlockRequestQueue for AdapterPort<'_> {
     fn max_request_pages(&self) -> u64 {
         1024
     }

+ 58 - 0
src/driver/ahci/register.rs

@@ -0,0 +1,58 @@
+use crate::{
+    kernel::{constants::EIO, mem::PhysAccess as _},
+    sync::fence::memory_barrier,
+    KResult,
+};
+use core::ptr::NonNull;
+use eonix_mm::address::PAddr;
+
+pub struct Register<T: Copy> {
+    addr: NonNull<T>,
+}
+
+unsafe impl<T: Copy> Send for Register<T> {}
+unsafe impl<T: Copy> Sync for Register<T> {}
+
+impl<T: Copy> Register<T> {
+    pub fn new(addr: PAddr) -> Self {
+        Self {
+            addr: unsafe { addr.as_ptr() },
+        }
+    }
+
+    pub fn read(&self) -> T {
+        unsafe { self.addr.as_ptr().read_volatile() }
+    }
+
+    pub fn write(&self, value: T) {
+        unsafe { self.addr.as_ptr().write_volatile(value) }
+    }
+
+    pub fn read_once(&self) -> T {
+        let val = unsafe { self.addr.as_ptr().read_volatile() };
+        memory_barrier();
+        val
+    }
+
+    pub fn write_once(&self, value: T) {
+        let val = unsafe { self.addr.as_ptr().write_volatile(value) };
+        memory_barrier();
+        val
+    }
+}
+
+impl Register<u32> {
+    pub fn spinwait_clear(&self, mask: u32) -> KResult<()> {
+        const SPINWAIT_MAX: usize = 1000;
+
+        for _ in 0..SPINWAIT_MAX {
+            if self.read() & mask == 0 {
+                memory_barrier();
+                return Ok(());
+            }
+        }
+
+        memory_barrier();
+        Err(EIO)
+    }
+}

+ 94 - 0
src/driver/ahci/slot.rs

@@ -0,0 +1,94 @@
+use super::{command_table::CommandTable, CommandHeader};
+use crate::KResult;
+use core::pin::pin;
+use eonix_mm::address::Addr as _;
+use eonix_sync::{Spin, WaitList};
+
+pub struct CommandSlot<'a> {
+    /// # Usage
+    /// `inner.cmdheader` might be used in irq handler. So in order to wait for
+    /// commands to finish, we should use `lock_irq` on `inner`
+    inner: Spin<CommandSlotInner<'a>>,
+    wait_list: WaitList,
+}
+
+struct CommandSlotInner<'a> {
+    state: SlotState,
+    cmdheader: &'a mut CommandHeader,
+}
+
+#[derive(Debug, PartialEq, Eq, Clone, Copy)]
+enum SlotState {
+    Idle,
+    Working,
+    Finished,
+    Error,
+}
+
+impl<'a> CommandSlot<'a> {
+    pub fn new(cmdheader: &'a mut CommandHeader) -> Self {
+        Self {
+            inner: Spin::new(CommandSlotInner {
+                state: SlotState::Idle,
+                cmdheader,
+            }),
+            wait_list: WaitList::new(),
+        }
+    }
+
+    pub fn handle_irq(&self) {
+        let mut inner = self.inner.lock();
+        debug_assert_eq!(inner.state, SlotState::Working);
+
+        // TODO: Check errors.
+        inner.state = SlotState::Finished;
+        inner.cmdheader.bytes_transferred = 0;
+        inner.cmdheader.prdt_length = 0;
+
+        self.wait_list.notify_all();
+    }
+
+    pub fn prepare_command(&self, cmdtable: &CommandTable, write: bool) {
+        let mut inner = self.inner.lock_irq();
+        let cmdheader = &mut inner.cmdheader;
+
+        cmdheader.first = 0x05; // FIS type
+
+        if write {
+            cmdheader.first |= 0x40;
+        }
+
+        cmdheader.second = 0x00;
+
+        cmdheader.prdt_length = cmdtable.prdt_len();
+        cmdheader.bytes_transferred = 0;
+        cmdheader.command_table_base = cmdtable.base().addr() as u64;
+
+        cmdheader._reserved = [0; 4];
+
+        inner.state = SlotState::Working;
+    }
+
+    pub async fn wait_finish(&self) -> KResult<()> {
+        let mut inner = loop {
+            let inner = self.inner.lock_irq();
+            if inner.state != SlotState::Working {
+                break inner;
+            }
+
+            let mut wait = pin!(self.wait_list.prepare_to_wait());
+            wait.as_mut().add_to_wait_list();
+
+            if inner.state != SlotState::Working {
+                break inner;
+            }
+
+            drop(inner);
+            wait.await;
+        };
+
+        inner.state = SlotState::Idle;
+
+        Ok(())
+    }
+}

+ 46 - 0
src/driver/ahci/stats.rs

@@ -0,0 +1,46 @@
+use core::sync::atomic::{AtomicUsize, Ordering};
+
+pub struct AdapterPortStats {
+    /// Number of commands sent
+    cmd_sent: AtomicUsize,
+
+    /// Number of transmission errors
+    cmd_error: AtomicUsize,
+
+    /// Number of interrupts fired
+    int_fired: AtomicUsize,
+}
+
+impl AdapterPortStats {
+    pub const fn new() -> Self {
+        Self {
+            cmd_sent: AtomicUsize::new(0),
+            cmd_error: AtomicUsize::new(0),
+            int_fired: AtomicUsize::new(0),
+        }
+    }
+
+    pub fn inc_int_fired(&self) {
+        self.int_fired.fetch_add(1, Ordering::Relaxed);
+    }
+
+    pub fn inc_cmd_sent(&self) {
+        self.cmd_sent.fetch_add(1, Ordering::Relaxed);
+    }
+
+    pub fn inc_cmd_error(&self) {
+        self.cmd_error.fetch_add(1, Ordering::Relaxed);
+    }
+
+    pub fn get_int_fired(&self) -> usize {
+        self.int_fired.load(Ordering::Relaxed)
+    }
+
+    pub fn get_cmd_sent(&self) -> usize {
+        self.cmd_sent.load(Ordering::Relaxed)
+    }
+
+    pub fn get_cmd_error(&self) -> usize {
+        self.cmd_error.load(Ordering::Relaxed)
+    }
+}

+ 434 - 432
src/driver/e1000e.rs

@@ -1,437 +1,439 @@
-use crate::prelude::*;
-
-use crate::bindings::root::kernel::hw::pci;
-use crate::kernel::interrupt::register_irq_handler;
-use crate::kernel::mem::{paging, phys};
-use crate::net::netdev;
-use alloc::boxed::Box;
-use alloc::vec::Vec;
-use bindings::EFAULT;
-use paging::Page;
-use phys::{NoCachePP, PhysPtr};
-
-use crate::bindings::root::{EAGAIN, EINVAL, EIO};
-
-mod defs;
-
-#[repr(C)]
-struct RxDescriptor {
-    buffer: u64,
-    length: u16,
-    checksum: u16,
-    status: u8,
-    errors: u8,
-    vlan: u16,
-}
-
-#[repr(C)]
-struct TxDescriptor {
-    buffer: u64,
-    length: u16,
-    cso: u8, // Checksum offset
-    cmd: u8,
-    status: u8,
-    css: u8, // Checksum start
-    vlan: u16,
-}
-
-const RX_DESC_SIZE: usize = 32;
-const TX_DESC_SIZE: usize = 32;
-
-struct E1000eDev {
-    mac: netdev::Mac,
-    status: netdev::LinkStatus,
-    speed: netdev::LinkSpeed,
-    id: u32,
-
-    base: NoCachePP,
-    rt_desc_page: Page,
-    rx_head: Option<u32>,
-    rx_tail: Option<u32>,
-    tx_tail: Option<u32>,
-
-    rx_buffers: Option<Box<Vec<Page>>>,
-    tx_buffers: Option<Box<Vec<Page>>>,
-}
-
-fn test(val: u32, bit: u32) -> bool {
-    (val & bit) == bit
-}
-
-struct PrintableBytes<'a>(&'a [u8]);
-
-impl core::fmt::Debug for PrintableBytes<'_> {
-    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
-        write!(f, "PrintableBytes {{")?;
-        for chunk in self.0.chunks(16) {
-            for &byte in chunk {
-                write!(f, "{byte} ")?;
-            }
-            write!(f, "\n")?;
-        }
-        write!(f, "}}")?;
-
-        Ok(())
-    }
-}
-
-impl netdev::Netdev for E1000eDev {
-    fn mac(&self) -> netdev::Mac {
-        self.mac
-    }
-
-    fn link_status(&self) -> netdev::LinkStatus {
-        self.status
-    }
-
-    fn link_speed(&self) -> netdev::LinkSpeed {
-        self.speed
-    }
-
-    fn id(&self) -> u32 {
-        self.id
-    }
-
-    fn up(&mut self) -> Result<(), u32> {
-        let ctrl = self.read(defs::REG_CTRL);
-        let status = self.read(defs::REG_STAT);
-
-        // check link up
-        if !test(ctrl, defs::CTRL_SLU) || !test(status, defs::STAT_LU) {
-            return Err(EIO);
-        }
-
-        // auto negotiation of speed
-        match status & defs::STAT_SPEED_MASK {
-            defs::STAT_SPEED_10M => self.speed = netdev::LinkSpeed::Speed10M,
-            defs::STAT_SPEED_100M => self.speed = netdev::LinkSpeed::Speed100M,
-            defs::STAT_SPEED_1000M => self.speed = netdev::LinkSpeed::Speed1000M,
-            _ => return Err(EINVAL),
-        }
-
-        // clear multicast table
-        for i in (0..128).step_by(4) {
-            self.write(defs::REG_MTA + i, 0);
-        }
-
-        self.clear_stats()?;
-
-        // setup interrupt handler
-        let device = netdev::get_netdev(self.id).unwrap();
-        let handler = move || {
-            device.lock().fire().unwrap();
-        };
-
-        register_irq_handler(0xb, handler)?;
-
-        // enable interrupts
-        self.write(defs::REG_IMS, defs::ICR_NORMAL | defs::ICR_UP);
-
-        // read to clear any pending interrupts
-        self.read(defs::REG_ICR);
-
-        self.setup_rx()?;
-        self.setup_tx()?;
-
-        self.status = netdev::LinkStatus::Up;
-
-        Ok(())
-    }
-
-    fn fire(&mut self) -> Result<(), u32> {
-        let cause = self.read(defs::REG_ICR);
-        if !test(cause, defs::ICR_INT) {
-            return Ok(());
-        }
-
-        loop {
-            let tail = self.rx_tail.ok_or(EIO)?;
-            let next_tail = (tail + 1) % RX_DESC_SIZE as u32;
-
-            if next_tail == self.read(defs::REG_RDH) {
-                break;
-            }
-
-            let ref mut desc = self.rx_desc_table()[next_tail as usize];
-            if !test(desc.status as u32, defs::RXD_STAT_DD as u32) {
-                Err(EIO)?;
-            }
-
-            desc.status = 0;
-            let len = desc.length as usize;
-
-            let buffers = self.rx_buffers.as_mut().ok_or(EIO)?;
-            let data = &buffers[next_tail as usize].as_slice()[..len];
-
-            println_debug!("e1000e: received {len} bytes, {:?}", PrintableBytes(data));
-            self.rx_tail = Some(next_tail);
-        }
-
-        Ok(())
-    }
-
-    fn send(&mut self, buf: &[u8]) -> Result<(), u32> {
-        let tail = self.tx_tail.ok_or(EIO)?;
-        let head = self.read(defs::REG_TDH);
-        let next_tail = (tail + 1) % TX_DESC_SIZE as u32;
-
-        if next_tail == head {
-            return Err(EAGAIN);
-        }
-
-        let ref mut desc = self.tx_desc_table()[tail as usize];
-        if !test(desc.status as u32, defs::TXD_STAT_DD as u32) {
-            return Err(EIO);
-        }
-
-        let buffer_page = Page::alloc_one();
-        if buf.len() > buffer_page.len() {
-            return Err(EFAULT);
-        }
-        buffer_page.as_mut_slice()[..buf.len()].copy_from_slice(buf);
-
-        desc.buffer = buffer_page.as_phys() as u64;
-        desc.length = buf.len() as u16;
-        desc.cmd = defs::TXD_CMD_EOP | defs::TXD_CMD_IFCS | defs::TXD_CMD_RS;
-        desc.status = 0;
-
-        self.tx_tail = Some(next_tail);
-        self.write(defs::REG_TDT, next_tail);
-
-        // TODO: check if the packets are sent and update self.tx_head state
-
-        Ok(())
-    }
-}
-
-impl E1000eDev {
-    fn setup_rx(&mut self) -> Result<(), u32> {
-        if !self.rx_head.is_none() || !self.rx_tail.is_none() {
-            return Err(EINVAL);
-        }
-
-        let addr = self.rt_desc_page.as_phys();
-
-        self.write(defs::REG_RDBAL, addr as u32);
-        self.write(defs::REG_RDBAH, (addr >> 32) as u32);
-
-        self.write(
-            defs::REG_RDLEN,
-            (RX_DESC_SIZE * size_of::<RxDescriptor>()) as u32,
-        );
-
-        self.write(defs::REG_RDH, 0);
-        self.write(defs::REG_RDT, RX_DESC_SIZE as u32 - 1);
-
-        self.rx_head = Some(0);
-        self.rx_tail = Some(RX_DESC_SIZE as u32 - 1);
-
-        self.write(
-            defs::REG_RCTL,
-            defs::RCTL_EN
-                | defs::RCTL_MPE
-                | defs::RCTL_LPE
-                | defs::RCTL_LBM_NO
-                | defs::RCTL_DTYP_LEGACY
-                | defs::RCTL_BAM
-                | defs::RCTL_BSIZE_8192
-                | defs::RCTL_SECRC,
-        );
-
-        Ok(())
-    }
-
-    fn setup_tx(&mut self) -> Result<(), u32> {
-        if !self.tx_tail.is_none() {
-            return Err(EINVAL);
-        }
-
-        let addr = self.rt_desc_page.as_phys() + 0x200;
-
-        self.write(defs::REG_TDBAL, addr as u32);
-        self.write(defs::REG_TDBAH, (addr >> 32) as u32);
-
-        self.write(
-            defs::REG_TDLEN,
-            (TX_DESC_SIZE * size_of::<TxDescriptor>()) as u32,
-        );
-
-        self.write(defs::REG_TDH, 0);
-        self.write(defs::REG_TDT, 0);
-
-        self.tx_tail = Some(0);
-
-        self.write(
-            defs::REG_TCTL,
-            defs::TCTL_EN
-                | defs::TCTL_PSP
-                | (15 << defs::TCTL_CT_SHIFT)
-                | (64 << defs::TCTL_COLD_SHIFT)
-                | defs::TCTL_RTLC,
-        );
-
-        Ok(())
-    }
-
-    fn reset(&self) -> Result<(), u32> {
-        // disable interrupts so we won't mess things up
-        self.write(defs::REG_IMC, 0xffffffff);
-
-        let ctrl = self.read(defs::REG_CTRL);
-        self.write(defs::REG_CTRL, ctrl | defs::CTRL_GIOD);
-
-        while self.read(defs::REG_STAT) & defs::STAT_GIOE != 0 {
-            // wait for link up
-        }
-
-        let ctrl = self.read(defs::REG_CTRL);
-        self.write(defs::REG_CTRL, ctrl | defs::CTRL_RST);
-
-        while self.read(defs::REG_CTRL) & defs::CTRL_RST != 0 {
-            // wait for reset
-        }
-
-        // disable interrupts again
-        self.write(defs::REG_IMC, 0xffffffff);
-
-        Ok(())
-    }
-
-    fn clear_stats(&self) -> Result<(), u32> {
-        self.write(defs::REG_COLC, 0);
-        self.write(defs::REG_GPRC, 0);
-        self.write(defs::REG_MPRC, 0);
-        self.write(defs::REG_GPTC, 0);
-        self.write(defs::REG_GORCL, 0);
-        self.write(defs::REG_GORCH, 0);
-        self.write(defs::REG_GOTCL, 0);
-        self.write(defs::REG_GOTCH, 0);
-        Ok(())
-    }
-
-    pub fn new(base: NoCachePP) -> Result<Self, u32> {
-        let page = Page::alloc_one();
-
-        page.zero();
-
-        let mut dev = Self {
-            mac: [0; 6],
-            status: netdev::LinkStatus::Down,
-            speed: netdev::LinkSpeed::SpeedUnknown,
-            id: netdev::alloc_id(),
-            base,
-            rt_desc_page: page,
-            rx_head: None,
-            rx_tail: None,
-            tx_tail: None,
-            rx_buffers: None,
-            tx_buffers: None,
-        };
-
-        dev.reset()?;
-
-        dev.mac = unsafe { dev.base.offset(0x5400).as_ptr::<[u8; 6]>().read() };
-        dev.tx_buffers = Some(Box::new(Vec::with_capacity(TX_DESC_SIZE)));
-
-        let mut rx_buffers = Box::new(Vec::with_capacity(RX_DESC_SIZE));
-
-        for index in 0..RX_DESC_SIZE {
-            let page = Page::alloc_many(2);
-
-            let ref mut desc = dev.rx_desc_table()[index];
-            desc.buffer = page.as_phys() as u64;
-            desc.status = 0;
-
-            rx_buffers.push(page);
-        }
-
-        for index in 0..TX_DESC_SIZE {
-            let ref mut desc = dev.tx_desc_table()[index];
-            desc.status = defs::TXD_STAT_DD;
-        }
-
-        dev.rx_buffers = Some(rx_buffers);
-
-        Ok(dev)
-    }
-
-    fn read(&self, offset: u32) -> u32 {
-        unsafe {
-            self.base
-                .offset(offset as isize)
-                .as_ptr::<u32>()
-                .read_volatile()
-        }
-    }
-
-    fn write(&self, offset: u32, value: u32) {
-        unsafe {
-            self.base
-                .offset(offset as isize)
-                .as_ptr::<u32>()
-                .write_volatile(value)
-        }
-    }
-
-    fn rx_desc_table<'lt>(&'lt self) -> &'lt mut [RxDescriptor; RX_DESC_SIZE] {
-        self.rt_desc_page.as_cached().as_mut()
-    }
-
-    fn tx_desc_table<'lt>(&'lt self) -> &'lt mut [TxDescriptor; TX_DESC_SIZE] {
-        self.rt_desc_page.as_cached().offset(0x200).as_mut()
-    }
-}
-
-impl Drop for E1000eDev {
-    fn drop(&mut self) {
-        assert_eq!(self.status, netdev::LinkStatus::Down);
-
-        if let Some(_) = self.rx_buffers.take() {}
-
-        // TODO: we should wait until all packets are sent
-        if let Some(_) = self.tx_buffers.take() {}
-
-        let _ = self.rt_desc_page;
-    }
-}
-
-impl pci::pci_device {
-    fn header0(&self) -> &pci::device_header_type0 {
-        unsafe { self.header_type0().as_ref() }.unwrap()
-    }
-}
-
-fn do_probe_device(dev: &mut pci::pci_device) -> Result<(), u32> {
-    let bar0 = dev.header0().bars[0];
-
-    if bar0 & 0xf != 0 {
-        return Err(EINVAL);
-    }
-
-    unsafe { dev.enableBusMastering() };
-
-    let base = NoCachePP::new((bar0 & !0xf) as usize);
-    let e1000e = E1000eDev::new(base)?;
-
-    netdev::register_netdev(e1000e)?;
-
-    Ok(())
-}
-
-unsafe extern "C" fn probe_device(dev: *mut pci::pci_device) -> i32 {
-    let dev = dev.as_mut().unwrap();
-    match do_probe_device(dev) {
-        Ok(_) => 0,
-        Err(e) => -(e as i32),
-    }
-}
+// use crate::prelude::*;
+//
+// use crate::bindings::root::kernel::hw::pci;
+// use crate::kernel::interrupt::register_irq_handler;
+// use crate::kernel::mem::{paging, phys};
+// use crate::net::netdev;
+// use alloc::boxed::Box;
+// use alloc::vec::Vec;
+// use bindings::EFAULT;
+// use paging::Page;
+// use phys::{NoCachePP, PhysPtr};
+//
+// use crate::bindings::root::{EAGAIN, EINVAL, EIO};
+//
+// mod defs;
+//
+// #[repr(C)]
+// struct RxDescriptor {
+//     buffer: u64,
+//     length: u16,
+//     checksum: u16,
+//     status: u8,
+//     errors: u8,
+//     vlan: u16,
+// }
+//
+// #[repr(C)]
+// struct TxDescriptor {
+//     buffer: u64,
+//     length: u16,
+//     cso: u8, // Checksum offset
+//     cmd: u8,
+//     status: u8,
+//     css: u8, // Checksum start
+//     vlan: u16,
+// }
+//
+// const RX_DESC_SIZE: usize = 32;
+// const TX_DESC_SIZE: usize = 32;
+//
+// struct E1000eDev {
+//     mac: netdev::Mac,
+//     status: netdev::LinkStatus,
+//     speed: netdev::LinkSpeed,
+//     id: u32,
+//
+//     base: NoCachePP,
+//     rt_desc_page: Page,
+//     rx_head: Option<u32>,
+//     rx_tail: Option<u32>,
+//     tx_tail: Option<u32>,
+//
+//     rx_buffers: Option<Box<Vec<Page>>>,
+//     tx_buffers: Option<Box<Vec<Page>>>,
+// }
+//
+// fn test(val: u32, bit: u32) -> bool {
+//     (val & bit) == bit
+// }
+//
+// struct PrintableBytes<'a>(&'a [u8]);
+//
+// impl core::fmt::Debug for PrintableBytes<'_> {
+//     fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+//         write!(f, "PrintableBytes {{")?;
+//         for chunk in self.0.chunks(16) {
+//             for &byte in chunk {
+//                 write!(f, "{byte} ")?;
+//             }
+//             write!(f, "\n")?;
+//         }
+//         write!(f, "}}")?;
+//
+//         Ok(())
+//     }
+// }
+//
+// impl netdev::Netdev for E1000eDev {
+//     fn mac(&self) -> netdev::Mac {
+//         self.mac
+//     }
+//
+//     fn link_status(&self) -> netdev::LinkStatus {
+//         self.status
+//     }
+//
+//     fn link_speed(&self) -> netdev::LinkSpeed {
+//         self.speed
+//     }
+//
+//     fn id(&self) -> u32 {
+//         self.id
+//     }
+//
+//     fn up(&mut self) -> Result<(), u32> {
+//         let ctrl = self.read(defs::REG_CTRL);
+//         let status = self.read(defs::REG_STAT);
+//
+//         // check link up
+//         if !test(ctrl, defs::CTRL_SLU) || !test(status, defs::STAT_LU) {
+//             return Err(EIO);
+//         }
+//
+//         // auto negotiation of speed
+//         match status & defs::STAT_SPEED_MASK {
+//             defs::STAT_SPEED_10M => self.speed = netdev::LinkSpeed::Speed10M,
+//             defs::STAT_SPEED_100M => self.speed = netdev::LinkSpeed::Speed100M,
+//             defs::STAT_SPEED_1000M => self.speed = netdev::LinkSpeed::Speed1000M,
+//             _ => return Err(EINVAL),
+//         }
+//
+//         // clear multicast table
+//         for i in (0..128).step_by(4) {
+//             self.write(defs::REG_MTA + i, 0);
+//         }
+//
+//         self.clear_stats()?;
+//
+//         // setup interrupt handler
+//         let device = netdev::get_netdev(self.id).unwrap();
+//         let handler = move || {
+//             eonix_runtime::task::Task::block_on(device.lock())
+//                 .fire()
+//                 .unwrap();
+//         };
+//
+//         register_irq_handler(0xb, handler)?;
+//
+//         // enable interrupts
+//         self.write(defs::REG_IMS, defs::ICR_NORMAL | defs::ICR_UP);
+//
+//         // read to clear any pending interrupts
+//         self.read(defs::REG_ICR);
+//
+//         self.setup_rx()?;
+//         self.setup_tx()?;
+//
+//         self.status = netdev::LinkStatus::Up;
+//
+//         Ok(())
+//     }
+//
+//     fn fire(&mut self) -> Result<(), u32> {
+//         let cause = self.read(defs::REG_ICR);
+//         if !test(cause, defs::ICR_INT) {
+//             return Ok(());
+//         }
+//
+//         loop {
+//             let tail = self.rx_tail.ok_or(EIO)?;
+//             let next_tail = (tail + 1) % RX_DESC_SIZE as u32;
+//
+//             if next_tail == self.read(defs::REG_RDH) {
+//                 break;
+//             }
+//
+//             let ref mut desc = self.rx_desc_table()[next_tail as usize];
+//             if !test(desc.status as u32, defs::RXD_STAT_DD as u32) {
+//                 Err(EIO)?;
+//             }
+//
+//             desc.status = 0;
+//             let len = desc.length as usize;
+//
+//             let buffers = self.rx_buffers.as_mut().ok_or(EIO)?;
+//             let data = &buffers[next_tail as usize].as_slice()[..len];
+//
+//             println_debug!("e1000e: received {len} bytes, {:?}", PrintableBytes(data));
+//             self.rx_tail = Some(next_tail);
+//         }
+//
+//         Ok(())
+//     }
+//
+//     fn send(&mut self, buf: &[u8]) -> Result<(), u32> {
+//         let tail = self.tx_tail.ok_or(EIO)?;
+//         let head = self.read(defs::REG_TDH);
+//         let next_tail = (tail + 1) % TX_DESC_SIZE as u32;
+//
+//         if next_tail == head {
+//             return Err(EAGAIN);
+//         }
+//
+//         let ref mut desc = self.tx_desc_table()[tail as usize];
+//         if !test(desc.status as u32, defs::TXD_STAT_DD as u32) {
+//             return Err(EIO);
+//         }
+//
+//         let buffer_page = Page::alloc_one();
+//         if buf.len() > buffer_page.len() {
+//             return Err(EFAULT);
+//         }
+//         buffer_page.as_mut_slice()[..buf.len()].copy_from_slice(buf);
+//
+//         desc.buffer = buffer_page.as_phys() as u64;
+//         desc.length = buf.len() as u16;
+//         desc.cmd = defs::TXD_CMD_EOP | defs::TXD_CMD_IFCS | defs::TXD_CMD_RS;
+//         desc.status = 0;
+//
+//         self.tx_tail = Some(next_tail);
+//         self.write(defs::REG_TDT, next_tail);
+//
+//         // TODO: check if the packets are sent and update self.tx_head state
+//
+//         Ok(())
+//     }
+// }
+//
+// impl E1000eDev {
+//     fn setup_rx(&mut self) -> Result<(), u32> {
+//         if !self.rx_head.is_none() || !self.rx_tail.is_none() {
+//             return Err(EINVAL);
+//         }
+//
+//         let addr = self.rt_desc_page.as_phys();
+//
+//         self.write(defs::REG_RDBAL, addr as u32);
+//         self.write(defs::REG_RDBAH, (addr >> 32) as u32);
+//
+//         self.write(
+//             defs::REG_RDLEN,
+//             (RX_DESC_SIZE * size_of::<RxDescriptor>()) as u32,
+//         );
+//
+//         self.write(defs::REG_RDH, 0);
+//         self.write(defs::REG_RDT, RX_DESC_SIZE as u32 - 1);
+//
+//         self.rx_head = Some(0);
+//         self.rx_tail = Some(RX_DESC_SIZE as u32 - 1);
+//
+//         self.write(
+//             defs::REG_RCTL,
+//             defs::RCTL_EN
+//                 | defs::RCTL_MPE
+//                 | defs::RCTL_LPE
+//                 | defs::RCTL_LBM_NO
+//                 | defs::RCTL_DTYP_LEGACY
+//                 | defs::RCTL_BAM
+//                 | defs::RCTL_BSIZE_8192
+//                 | defs::RCTL_SECRC,
+//         );
+//
+//         Ok(())
+//     }
+//
+//     fn setup_tx(&mut self) -> Result<(), u32> {
+//         if !self.tx_tail.is_none() {
+//             return Err(EINVAL);
+//         }
+//
+//         let addr = self.rt_desc_page.as_phys() + 0x200;
+//
+//         self.write(defs::REG_TDBAL, addr as u32);
+//         self.write(defs::REG_TDBAH, (addr >> 32) as u32);
+//
+//         self.write(
+//             defs::REG_TDLEN,
+//             (TX_DESC_SIZE * size_of::<TxDescriptor>()) as u32,
+//         );
+//
+//         self.write(defs::REG_TDH, 0);
+//         self.write(defs::REG_TDT, 0);
+//
+//         self.tx_tail = Some(0);
+//
+//         self.write(
+//             defs::REG_TCTL,
+//             defs::TCTL_EN
+//                 | defs::TCTL_PSP
+//                 | (15 << defs::TCTL_CT_SHIFT)
+//                 | (64 << defs::TCTL_COLD_SHIFT)
+//                 | defs::TCTL_RTLC,
+//         );
+//
+//         Ok(())
+//     }
+//
+//     fn reset(&self) -> Result<(), u32> {
+//         // disable interrupts so we won't mess things up
+//         self.write(defs::REG_IMC, 0xffffffff);
+//
+//         let ctrl = self.read(defs::REG_CTRL);
+//         self.write(defs::REG_CTRL, ctrl | defs::CTRL_GIOD);
+//
+//         while self.read(defs::REG_STAT) & defs::STAT_GIOE != 0 {
+//             // wait for link up
+//         }
+//
+//         let ctrl = self.read(defs::REG_CTRL);
+//         self.write(defs::REG_CTRL, ctrl | defs::CTRL_RST);
+//
+//         while self.read(defs::REG_CTRL) & defs::CTRL_RST != 0 {
+//             // wait for reset
+//         }
+//
+//         // disable interrupts again
+//         self.write(defs::REG_IMC, 0xffffffff);
+//
+//         Ok(())
+//     }
+//
+//     fn clear_stats(&self) -> Result<(), u32> {
+//         self.write(defs::REG_COLC, 0);
+//         self.write(defs::REG_GPRC, 0);
+//         self.write(defs::REG_MPRC, 0);
+//         self.write(defs::REG_GPTC, 0);
+//         self.write(defs::REG_GORCL, 0);
+//         self.write(defs::REG_GORCH, 0);
+//         self.write(defs::REG_GOTCL, 0);
+//         self.write(defs::REG_GOTCH, 0);
+//         Ok(())
+//     }
+//
+//     pub fn new(base: NoCachePP) -> Result<Self, u32> {
+//         let page = Page::alloc_one();
+//
+//         page.zero();
+//
+//         let mut dev = Self {
+//             mac: [0; 6],
+//             status: netdev::LinkStatus::Down,
+//             speed: netdev::LinkSpeed::SpeedUnknown,
+//             id: netdev::alloc_id(),
+//             base,
+//             rt_desc_page: page,
+//             rx_head: None,
+//             rx_tail: None,
+//             tx_tail: None,
+//             rx_buffers: None,
+//             tx_buffers: None,
+//         };
+//
+//         dev.reset()?;
+//
+//         dev.mac = unsafe { dev.base.offset(0x5400).as_ptr::<[u8; 6]>().read() };
+//         dev.tx_buffers = Some(Box::new(Vec::with_capacity(TX_DESC_SIZE)));
+//
+//         let mut rx_buffers = Box::new(Vec::with_capacity(RX_DESC_SIZE));
+//
+//         for index in 0..RX_DESC_SIZE {
+//             let page = Page::alloc_many(2);
+//
+//             let ref mut desc = dev.rx_desc_table()[index];
+//             desc.buffer = page.as_phys() as u64;
+//             desc.status = 0;
+//
+//             rx_buffers.push(page);
+//         }
+//
+//         for index in 0..TX_DESC_SIZE {
+//             let ref mut desc = dev.tx_desc_table()[index];
+//             desc.status = defs::TXD_STAT_DD;
+//         }
+//
+//         dev.rx_buffers = Some(rx_buffers);
+//
+//         Ok(dev)
+//     }
+//
+//     fn read(&self, offset: u32) -> u32 {
+//         unsafe {
+//             self.base
+//                 .offset(offset as isize)
+//                 .as_ptr::<u32>()
+//                 .read_volatile()
+//         }
+//     }
+//
+//     fn write(&self, offset: u32, value: u32) {
+//         unsafe {
+//             self.base
+//                 .offset(offset as isize)
+//                 .as_ptr::<u32>()
+//                 .write_volatile(value)
+//         }
+//     }
+//
+//     fn rx_desc_table<'lt>(&'lt self) -> &'lt mut [RxDescriptor; RX_DESC_SIZE] {
+//         self.rt_desc_page.as_cached().as_mut()
+//     }
+//
+//     fn tx_desc_table<'lt>(&'lt self) -> &'lt mut [TxDescriptor; TX_DESC_SIZE] {
+//         self.rt_desc_page.as_cached().offset(0x200).as_mut()
+//     }
+// }
+//
+// impl Drop for E1000eDev {
+//     fn drop(&mut self) {
+//         assert_eq!(self.status, netdev::LinkStatus::Down);
+//
+//         if let Some(_) = self.rx_buffers.take() {}
+//
+//         // TODO: we should wait until all packets are sent
+//         if let Some(_) = self.tx_buffers.take() {}
+//
+//         let _ = self.rt_desc_page;
+//     }
+// }
+//
+// impl pci::pci_device {
+//     fn header0(&self) -> &pci::device_header_type0 {
+//         unsafe { self.header_type0().as_ref() }.unwrap()
+//     }
+// }
+//
+// fn do_probe_device(dev: &mut pci::pci_device) -> Result<(), u32> {
+//     let bar0 = dev.header0().bars[0];
+//
+//     if bar0 & 0xf != 0 {
+//         return Err(EINVAL);
+//     }
+//
+//     unsafe { dev.enableBusMastering() };
+//
+//     let base = NoCachePP::new((bar0 & !0xf) as usize);
+//     let e1000e = E1000eDev::new(base)?;
+//
+//     netdev::register_netdev(e1000e)?;
+//
+//     Ok(())
+// }
+//
+// unsafe extern "C" fn probe_device(dev: *mut pci::pci_device) -> i32 {
+//     let dev = dev.as_mut().unwrap();
+//     match do_probe_device(dev) {
+//         Ok(_) => 0,
+//         Err(e) => -(e as i32),
+//     }
+// }
 
 pub fn register_e1000e_driver() {
-    let dev_ids = [0x100e, 0x10d3, 0x10ea, 0x153a];
+    // let dev_ids = [0x100e, 0x10d3, 0x10ea, 0x153a];
 
-    for id in dev_ids.into_iter() {
-        let ret = unsafe { pci::register_driver_r(0x8086, id, Some(probe_device)) };
+    // for id in dev_ids.into_iter() {
+    //     let ret = unsafe { pci::register_driver_r(0x8086, id, Some(probe_device)) };
 
-        assert_eq!(ret, 0);
-    }
+    //     assert_eq!(ret, 0);
+    // }
 }

+ 25 - 15
src/driver/serial.rs

@@ -5,11 +5,12 @@ use crate::{
         task::KernelStack, CharDevice, CharDeviceType, Terminal, TerminalDevice,
     },
     prelude::*,
-    sync::UCondVar,
 };
 use alloc::{collections::vec_deque::VecDeque, format, sync::Arc};
 use bitflags::bitflags;
+use core::pin::pin;
 use eonix_runtime::{run::FutureRun, scheduler::Scheduler};
+use eonix_sync::WaitList;
 
 bitflags! {
     struct LineStatus: u8 {
@@ -24,7 +25,7 @@ struct Serial {
     name: Arc<str>,
 
     terminal: Spin<Option<Arc<Terminal>>>,
-    cv_worker: UCondVar,
+    worker_wait: WaitList,
 
     working: Spin<bool>,
     tx_buffer: Spin<VecDeque<u8>>,
@@ -53,7 +54,7 @@ impl Serial {
 
     fn disable_interrupts(&self) {
         // Disable interrupt #0: Received data available
-        self.int_ena.write(0x00);
+        self.int_ena.write(0x02);
     }
 
     fn line_status(&self) -> LineStatus {
@@ -61,13 +62,18 @@ impl Serial {
     }
 
     async fn wait_for_interrupt(&self) {
-        let mut working = self.working.lock_irq();
-        self.enable_interrupts();
-        *working = false;
+        let mut wait = pin!(self.worker_wait.prepare_to_wait());
 
-        self.cv_worker.async_wait(&mut working).await;
+        {
+            let mut working = self.working.lock_irq();
+            self.enable_interrupts();
+            wait.as_mut().add_to_wait_list();
+            *working = false;
+        };
+
+        wait.await;
 
-        *working = true;
+        *self.working.lock_irq() = true;
         self.disable_interrupts();
     }
 
@@ -79,30 +85,30 @@ impl Serial {
                 let ch = port.tx_rx.read();
 
                 if let Some(terminal) = terminal.as_ref() {
-                    terminal.commit_char(ch);
+                    terminal.commit_char(ch).await;
                 }
             }
 
             let should_wait = {
                 let mut tx_buffer = port.tx_buffer.lock();
+                let mut count = 0;
 
                 // Give it a chance to receive data.
-                let count = tx_buffer.len().min(64);
-                for ch in tx_buffer.drain(..count) {
+                for &ch in tx_buffer.iter().take(64) {
                     if port.line_status().contains(LineStatus::TX_READY) {
                         port.tx_rx.write(ch);
                     } else {
                         break;
                     }
+                    count += 1;
                 }
+                tx_buffer.drain(..count);
 
                 tx_buffer.is_empty()
             };
 
             if should_wait {
                 port.wait_for_interrupt().await;
-            } else {
-                Scheduler::yield_now().await;
             }
         }
     }
@@ -112,7 +118,7 @@ impl Serial {
             id,
             name: Arc::from(format!("ttyS{id}")),
             terminal: Spin::new(None),
-            cv_worker: UCondVar::new(),
+            worker_wait: WaitList::new(),
             working: Spin::new(true),
             tx_buffer: Spin::new(VecDeque::new()),
             tx_rx: Port8::new(base_port),
@@ -146,7 +152,7 @@ impl Serial {
     fn wakeup_worker(&self) {
         let working = self.working.lock_irq();
         if !*working {
-            self.cv_worker.notify_all();
+            self.worker_wait.notify_one();
         }
     }
 
@@ -196,6 +202,10 @@ impl TerminalDevice for Serial {
         tx_buffer.push_back(ch);
         self.wakeup_worker();
     }
+
+    fn putchar_direct(&self, ch: u8) {
+        self.tx_rx.write(ch);
+    }
 }
 
 pub fn init() -> KResult<()> {

+ 11 - 11
src/elf.rs

@@ -1,15 +1,15 @@
-use alloc::{ffi::CString, sync::Arc};
-use bitflags::bitflags;
-
 use crate::{
     io::{ByteBuffer, UninitBuffer},
     kernel::{
         constants::ENOEXEC,
-        mem::{FileMapping, MMList, Mapping, Permission, VAddr},
+        mem::{FileMapping, MMList, Mapping, Permission},
         vfs::dentry::Dentry,
     },
     prelude::*,
 };
+use alloc::{ffi::CString, sync::Arc};
+use bitflags::bitflags;
+use eonix_mm::address::{Addr as _, AddrOps as _, VAddr};
 
 #[repr(u8)]
 #[allow(dead_code)]
@@ -244,13 +244,13 @@ impl ParsedElf32 {
     pub fn load(self, args: Vec<CString>, envs: Vec<CString>) -> KResult<(VAddr, VAddr, MMList)> {
         let mm_list = MMList::new();
 
-        let mut data_segment_end = VAddr(0);
+        let mut data_segment_end = VAddr::NULL;
         for phent in self
             .phents
             .into_iter()
             .filter(|ent| ent.ph_type == Elf32PhType::Load)
         {
-            let vaddr_start = VAddr(phent.vaddr as usize);
+            let vaddr_start = VAddr::from(phent.vaddr as usize);
             let vmem_vaddr_end = vaddr_start + phent.mem_size as usize;
             let load_vaddr_end = vaddr_start + phent.file_size as usize;
 
@@ -296,8 +296,8 @@ impl ParsedElf32 {
 
         // Map stack area
         mm_list.mmap_fixed(
-            VAddr(0xc0000000 - 0x800000), // Stack bottom is at 0xc0000000
-            0x800000,                     // 8MB stack size
+            VAddr::from(0xc0000000 - 0x800000), // Stack bottom is at 0xc0000000
+            0x800000,                           // 8MB stack size
             Mapping::Anonymous,
             Permission {
                 write: true,
@@ -319,7 +319,7 @@ impl ParsedElf32 {
         longs.push(0); // AT_NULL
 
         sp = sp - longs.len() * size_of::<u32>();
-        sp = VAddr::from(usize::from(sp) & !0xf); // Align to 16 bytes
+        sp = sp.floor_to(16);
 
         mm_list.access_mut(sp, longs.len() * size_of::<u32>(), |offset, data| {
             data.copy_from_slice(unsafe {
@@ -330,7 +330,7 @@ impl ParsedElf32 {
             })
         })?;
 
-        Ok((VAddr(self.entry as usize), sp, mm_list))
+        Ok((VAddr::from(self.entry as usize), sp, mm_list))
     }
 }
 
@@ -342,7 +342,7 @@ fn push_strings(mm_list: &MMList, sp: &mut VAddr, strings: Vec<CString>) -> KRes
         mm_list.access_mut(*sp, len, |offset, data| {
             data.copy_from_slice(&string.as_bytes_with_nul()[offset..offset + data.len()])
         })?;
-        addrs.push(usize::from(*sp) as u32);
+        addrs.push(sp.addr() as u32);
     }
 
     Ok(addrs)

+ 18 - 19
src/fs/fat32.rs

@@ -1,14 +1,5 @@
-use core::{ops::ControlFlow, sync::atomic::Ordering};
-
-use alloc::{
-    collections::btree_map::BTreeMap,
-    sync::{Arc, Weak},
-    vec::Vec,
-};
-use bindings::EIO;
-
-use dir::Dirs as _;
-use file::ClusterRead;
+mod dir;
+mod file;
 
 use crate::{
     io::{Buffer, ByteBuffer, UninitBuffer},
@@ -27,9 +18,17 @@ use crate::{
     prelude::*,
     KResult,
 };
-
-mod dir;
-mod file;
+use alloc::{
+    collections::btree_map::BTreeMap,
+    sync::{Arc, Weak},
+    vec::Vec,
+};
+use bindings::EIO;
+use core::{ops::ControlFlow, sync::atomic::Ordering};
+use dir::Dirs as _;
+use eonix_runtime::task::Task;
+use eonix_sync::RwLock;
+use file::ClusterRead;
 
 type ClusterNo = u32;
 
@@ -79,7 +78,7 @@ struct FatFs {
     volume_label: [u8; 11],
 
     device: Arc<BlockDevice>,
-    fat: RwSemaphore<Vec<ClusterNo>>,
+    fat: RwLock<Vec<ClusterNo>>,
     weak: Weak<FatFs>,
     icache: BTreeMap<Ino, FatInode>,
 }
@@ -135,7 +134,7 @@ impl FatFs {
             sectors_per_cluster: 0,
             rootdir_cluster: 0,
             data_start: 0,
-            fat: RwSemaphore::new(Vec::new()),
+            fat: RwLock::new(Vec::new()),
             weak: weak.clone(),
             icache: BTreeMap::new(),
             volume_label: [0; 11],
@@ -247,7 +246,7 @@ impl Inode for FileInode {
     fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize> {
         let vfs = self.vfs.upgrade().ok_or(EIO)?;
         let vfs = vfs.as_any().downcast_ref::<FatFs>().unwrap();
-        let fat = vfs.fat.lock_shared();
+        let fat = Task::block_on(vfs.fat.read());
 
         if self.size.load(Ordering::Relaxed) as usize == 0 {
             return Ok(0);
@@ -288,7 +287,7 @@ impl Inode for DirInode {
     fn lookup(&self, dentry: &Arc<Dentry>) -> KResult<Option<Arc<dyn Inode>>> {
         let vfs = self.vfs.upgrade().ok_or(EIO)?;
         let vfs = vfs.as_any().downcast_ref::<FatFs>().unwrap();
-        let fat = vfs.fat.lock_shared();
+        let fat = Task::block_on(vfs.fat.read());
 
         let mut entries = ClusterIterator::new(fat.as_ref(), self.ino as ClusterNo)
             .read(vfs, 0)
@@ -319,7 +318,7 @@ impl Inode for DirInode {
     ) -> KResult<usize> {
         let vfs = self.vfs.upgrade().ok_or(EIO)?;
         let vfs = vfs.as_any().downcast_ref::<FatFs>().unwrap();
-        let fat = vfs.fat.lock_shared();
+        let fat = Task::block_on(vfs.fat.read());
 
         let cluster_iter = ClusterIterator::new(fat.as_ref(), self.ino as ClusterNo)
             .read(vfs, offset)

+ 10 - 4
src/fs/fat32/file.rs

@@ -1,6 +1,8 @@
-use crate::{kernel::mem::Page, KResult};
-
 use super::{ClusterIterator, FatFs};
+use crate::{
+    kernel::mem::{AsMemoryBlock as _, Page},
+    KResult,
+};
 
 pub trait ClusterReadIterator<'data>: Iterator<Item = KResult<&'data [u8]>> + 'data {}
 impl<'a, I> ClusterReadIterator<'a> for I where I: Iterator<Item = KResult<&'a [u8]>> + 'a {}
@@ -22,11 +24,15 @@ impl<'data, 'fat: 'data> ClusterRead<'data> for ClusterIterator<'fat> {
         let skip_clusters = offset / cluster_size;
         let mut inner_offset = offset % cluster_size;
 
-        let buffer_page = Page::alloc_one();
+        // TODO: Use block cache.
+        let buffer_page = Page::alloc();
 
         self.skip(skip_clusters).map(move |cluster| {
             vfs.read_cluster(cluster, &buffer_page)?;
-            let data = &buffer_page.as_slice()[inner_offset..];
+            let data = unsafe {
+                // SAFETY: No one could be writing to it.
+                &buffer_page.as_memblk().as_bytes()[inner_offset..]
+            };
             inner_offset = 0;
             Ok(data)
         })

+ 28 - 34
src/fs/procfs.rs

@@ -1,17 +1,8 @@
-use alloc::{
-    collections::btree_map::BTreeMap,
-    sync::{Arc, Weak},
-};
-use bindings::{EACCES, ENOTDIR};
-use core::{ops::ControlFlow, sync::atomic::Ordering};
-use itertools::Itertools;
-use lazy_static::lazy_static;
-
 use crate::{
     io::Buffer,
     kernel::{
         constants::{S_IFDIR, S_IFREG},
-        mem::paging::{Page, PageBuffer},
+        mem::paging::PageBuffer,
         vfs::{
             dentry::Dentry,
             inode::{define_struct_inode, AtomicIno, Ino, Inode, InodeData},
@@ -21,8 +12,13 @@ use crate::{
         },
     },
     prelude::*,
-    sync::{AsRefMutPosition as _, AsRefPosition as _, Locked},
 };
+use alloc::sync::{Arc, Weak};
+use bindings::{EACCES, ENOTDIR};
+use core::{ops::ControlFlow, sync::atomic::Ordering};
+use eonix_runtime::task::Task;
+use eonix_sync::{AsProof as _, AsProofMut as _, LazyLock, Locked};
+use itertools::Itertools;
 
 fn split_len_offset(data: &[u8], len: usize, offset: usize) -> Option<&[u8]> {
     let real_data = data.split_at_checked(len).map(|(data, _)| data)?;
@@ -104,10 +100,13 @@ impl Inode for FileInode {
             return Err(EACCES);
         }
 
-        let mut page_buffer = PageBuffer::new(Page::alloc_one());
-        let nread = self.file.read(&mut page_buffer)?;
+        let mut page_buffer = PageBuffer::new();
+        self.file.read(&mut page_buffer)?;
 
-        let data = split_len_offset(page_buffer.as_slice(), nread, offset);
+        let data = page_buffer
+            .data()
+            .split_at_checked(offset)
+            .map(|(_, data)| data);
 
         match data {
             None => Ok(0),
@@ -134,10 +133,10 @@ impl DirInode {
 
 impl Inode for DirInode {
     fn lookup(&self, dentry: &Arc<Dentry>) -> KResult<Option<Arc<dyn Inode>>> {
-        let lock = self.rwsem.lock_shared();
+        let lock = Task::block_on(self.rwsem.read());
         Ok(self
             .entries
-            .access(lock.as_pos())
+            .access(lock.prove())
             .iter()
             .find_map(|(name, node)| {
                 name.as_ref()
@@ -151,9 +150,9 @@ impl Inode for DirInode {
         offset: usize,
         callback: &mut dyn FnMut(&[u8], Ino) -> KResult<ControlFlow<(), ()>>,
     ) -> KResult<usize> {
-        let lock = self.rwsem.lock_shared();
+        let lock = Task::block_on(self.rwsem.read());
         self.entries
-            .access(lock.as_pos())
+            .access(lock.prove())
             .iter()
             .skip(offset)
             .map(|(name, node)| callback(name.as_ref(), node.ino()))
@@ -183,17 +182,12 @@ impl Vfs for ProcFs {
     }
 }
 
-lazy_static! {
-    static ref ICACHE: Spin<BTreeMap<Ino, ProcFsNode>> = Spin::new(BTreeMap::new());
-    static ref GLOBAL_PROCFS: Arc<ProcFs> = {
-        let fs: Arc<ProcFs> = Arc::new_cyclic(|weak: &Weak<ProcFs>| ProcFs {
-            root_node: DirInode::new(0, weak.clone()),
-            next_ino: AtomicIno::new(1),
-        });
-
-        fs
-    };
-}
+static GLOBAL_PROCFS: LazyLock<Arc<ProcFs>> = LazyLock::new(|| {
+    Arc::new_cyclic(|weak: &Weak<ProcFs>| ProcFs {
+        root_node: DirInode::new(0, weak.clone()),
+        next_ino: AtomicIno::new(1),
+    })
+});
 
 struct ProcFsMountCreator;
 
@@ -239,10 +233,10 @@ pub fn creat(
     let inode = FileInode::new(ino, Arc::downgrade(&fs), file);
 
     {
-        let lock = parent.idata.rwsem.lock();
+        let lock = Task::block_on(parent.idata.rwsem.write());
         parent
             .entries
-            .access_mut(lock.as_pos_mut())
+            .access_mut(lock.prove_mut())
             .push((name, ProcFsNode::File(inode.clone())));
     }
 
@@ -263,7 +257,7 @@ pub fn mkdir(parent: &ProcFsNode, name: &[u8]) -> KResult<ProcFsNode> {
 
     parent
         .entries
-        .access_mut(inode.rwsem.lock().as_pos_mut())
+        .access_mut(Task::block_on(inode.rwsem.write()).prove_mut())
         .push((Arc::from(name), ProcFsNode::Dir(inode.clone())));
 
     Ok(ProcFsNode::Dir(inode))
@@ -278,7 +272,7 @@ impl ProcFsFile for DumpMountsFile {
     fn read(&self, buffer: &mut PageBuffer) -> KResult<usize> {
         dump_mounts(&mut buffer.get_writer());
 
-        Ok(buffer.len())
+        Ok(buffer.data().len())
     }
 }
 
@@ -309,7 +303,7 @@ where
     }
 
     fn read(&self, buffer: &mut PageBuffer) -> KResult<usize> {
-        self.read_fn.as_ref().ok_or(EACCES)?(buffer).map(|_| buffer.len())
+        self.read_fn.as_ref().ok_or(EACCES)?(buffer).map(|_| buffer.data().len())
     }
 }
 

+ 28 - 28
src/fs/tmpfs.rs

@@ -1,8 +1,3 @@
-use alloc::sync::{Arc, Weak};
-use bindings::{EINVAL, EIO, EISDIR};
-use core::{ops::ControlFlow, sync::atomic::Ordering};
-use itertools::Itertools;
-
 use crate::{
     io::Buffer,
     kernel::constants::{S_IFBLK, S_IFCHR, S_IFDIR, S_IFLNK, S_IFREG},
@@ -15,8 +10,13 @@ use crate::{
         DevId,
     },
     prelude::*,
-    sync::{AsRefMutPosition as _, AsRefPosition as _, Locked, RefMutPosition},
 };
+use alloc::sync::{Arc, Weak};
+use bindings::{EINVAL, EIO, EISDIR};
+use core::{ops::ControlFlow, sync::atomic::Ordering};
+use eonix_runtime::task::Task;
+use eonix_sync::{AsProof as _, AsProofMut as _, Locked, ProofMut};
+use itertools::Itertools;
 
 fn acquire(vfs: &Weak<dyn Vfs>) -> KResult<Arc<dyn Vfs>> {
     vfs.upgrade().ok_or(EIO)
@@ -69,7 +69,7 @@ impl DirectoryInode {
         })
     }
 
-    fn link(&self, name: Arc<[u8]>, file: &dyn Inode, dlock: RefMutPosition<'_, ()>) {
+    fn link(&self, name: Arc<[u8]>, file: &dyn Inode, dlock: ProofMut<'_, ()>) {
         // SAFETY: Only `unlink` will do something based on `nlink` count
         //         No need to synchronize here
         file.nlink.fetch_add(1, Ordering::Relaxed);
@@ -87,9 +87,9 @@ impl Inode for DirectoryInode {
         offset: usize,
         callback: &mut dyn FnMut(&[u8], Ino) -> KResult<ControlFlow<(), ()>>,
     ) -> KResult<usize> {
-        let lock = self.rwsem.lock_shared();
+        let lock = Task::block_on(self.rwsem.read());
         self.entries
-            .access(lock.as_pos())
+            .access(lock.prove())
             .iter()
             .skip(offset)
             .map(|(name, ino)| callback(&name, *ino))
@@ -102,12 +102,12 @@ impl Inode for DirectoryInode {
         let vfs = acquire(&self.vfs)?;
         let vfs = astmp(&vfs);
 
-        let rwsem = self.rwsem.lock();
+        let rwsem = Task::block_on(self.rwsem.write());
 
         let ino = vfs.assign_ino();
         let file = FileInode::new(ino, self.vfs.clone(), mode);
 
-        self.link(at.name().clone(), file.as_ref(), rwsem.as_pos_mut());
+        self.link(at.name().clone(), file.as_ref(), rwsem.prove_mut());
         at.save_reg(file)
     }
 
@@ -119,7 +119,7 @@ impl Inode for DirectoryInode {
         let vfs = acquire(&self.vfs)?;
         let vfs = astmp(&vfs);
 
-        let rwsem = self.rwsem.lock();
+        let rwsem = Task::block_on(self.rwsem.write());
 
         let ino = vfs.assign_ino();
         let file = NodeInode::new(
@@ -129,7 +129,7 @@ impl Inode for DirectoryInode {
             dev,
         );
 
-        self.link(at.name().clone(), file.as_ref(), rwsem.as_pos_mut());
+        self.link(at.name().clone(), file.as_ref(), rwsem.prove_mut());
         at.save_reg(file)
     }
 
@@ -137,12 +137,12 @@ impl Inode for DirectoryInode {
         let vfs = acquire(&self.vfs)?;
         let vfs = astmp(&vfs);
 
-        let rwsem = self.rwsem.lock();
+        let rwsem = Task::block_on(self.rwsem.write());
 
         let ino = vfs.assign_ino();
         let file = SymlinkInode::new(ino, self.vfs.clone(), target.into());
 
-        self.link(at.name().clone(), file.as_ref(), rwsem.as_pos_mut());
+        self.link(at.name().clone(), file.as_ref(), rwsem.prove_mut());
         at.save_symlink(file)
     }
 
@@ -150,29 +150,29 @@ impl Inode for DirectoryInode {
         let vfs = acquire(&self.vfs)?;
         let vfs = astmp(&vfs);
 
-        let rwsem = self.rwsem.lock();
+        let rwsem = Task::block_on(self.rwsem.write());
 
         let ino = vfs.assign_ino();
         let newdir = DirectoryInode::new(ino, self.vfs.clone(), mode);
 
-        self.link(at.name().clone(), newdir.as_ref(), rwsem.as_pos_mut());
+        self.link(at.name().clone(), newdir.as_ref(), rwsem.prove_mut());
         at.save_dir(newdir)
     }
 
     fn unlink(&self, at: &Arc<Dentry>) -> KResult<()> {
         let _vfs = acquire(&self.vfs)?;
 
-        let dlock = self.rwsem.lock();
+        let dlock = Task::block_on(self.rwsem.write());
 
         let file = at.get_inode()?;
-        let _flock = file.rwsem.lock();
+        let _flock = file.rwsem.write();
 
         // SAFETY: `flock` has done the synchronization
         if file.mode.load(Ordering::Relaxed) & S_IFDIR != 0 {
             return Err(EISDIR);
         }
 
-        let entries = self.entries.access_mut(dlock.as_pos_mut());
+        let entries = self.entries.access_mut(dlock.prove_mut());
         entries.retain(|(_, ino)| *ino != file.ino);
 
         assert_eq!(
@@ -206,7 +206,7 @@ impl Inode for DirectoryInode {
 
     fn chmod(&self, mode: Mode) -> KResult<()> {
         let _vfs = acquire(&self.vfs)?;
-        let _lock = self.rwsem.lock();
+        let _lock = Task::block_on(self.rwsem.write());
 
         // SAFETY: `rwsem` has done the synchronization
         let old = self.mode.load(Ordering::Relaxed);
@@ -266,9 +266,9 @@ impl FileInode {
 impl Inode for FileInode {
     fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize> {
         // TODO: We don't need that strong guarantee, find some way to avoid locks
-        let lock = self.rwsem.lock_shared();
+        let lock = Task::block_on(self.rwsem.read());
 
-        match self.filedata.access(lock.as_pos()).split_at_checked(offset) {
+        match self.filedata.access(lock.prove()).split_at_checked(offset) {
             Some((_, data)) => buffer.fill(data).map(|result| result.allow_partial()),
             None => Ok(0),
         }
@@ -276,8 +276,8 @@ impl Inode for FileInode {
 
     fn write(&self, buffer: &[u8], offset: WriteOffset) -> KResult<usize> {
         // TODO: We don't need that strong guarantee, find some way to avoid locks
-        let lock = self.rwsem.lock();
-        let filedata = self.filedata.access_mut(lock.as_pos_mut());
+        let lock = Task::block_on(self.rwsem.write());
+        let filedata = self.filedata.access_mut(lock.prove_mut());
 
         let offset = match offset {
             WriteOffset::Position(offset) => offset,
@@ -304,8 +304,8 @@ impl Inode for FileInode {
 
     fn truncate(&self, length: usize) -> KResult<()> {
         // TODO: We don't need that strong guarantee, find some way to avoid locks
-        let lock = self.rwsem.lock();
-        let filedata = self.filedata.access_mut(lock.as_pos_mut());
+        let lock = Task::block_on(self.rwsem.write());
+        let filedata = self.filedata.access_mut(lock.prove_mut());
 
         // SAFETY: `lock` has done the synchronization
         self.size.store(length as u64, Ordering::Relaxed);
@@ -316,7 +316,7 @@ impl Inode for FileInode {
 
     fn chmod(&self, mode: Mode) -> KResult<()> {
         let _vfs = acquire(&self.vfs)?;
-        let _lock = self.rwsem.lock();
+        let _lock = Task::block_on(self.rwsem.write());
 
         // SAFETY: `rwsem` has done the synchronization
         let old = self.mode.load(Ordering::Relaxed);

+ 40 - 4
src/io.rs

@@ -1,8 +1,6 @@
-use bindings::EFAULT;
-
 use crate::prelude::*;
-
-use core::mem::MaybeUninit;
+use bindings::EFAULT;
+use core::{cmp, mem::MaybeUninit};
 
 #[must_use]
 pub enum FillResult {
@@ -187,3 +185,41 @@ impl Buffer for ByteBuffer<'_> {
         self.cur
     }
 }
+
+/// Iterator that generates chunks of a given length from a start index
+/// until the end of the total length.
+///
+/// The iterator returns a tuple of (start, len) for each chunk.
+pub struct Chunks {
+    start: usize,
+    end: usize,
+    cur: usize,
+    chunk_len: usize,
+}
+
+impl Chunks {
+    pub const fn new(start: usize, total_len: usize, chunk_len: usize) -> Self {
+        Self {
+            start,
+            end: start + total_len,
+            cur: start,
+            chunk_len,
+        }
+    }
+}
+
+impl Iterator for Chunks {
+    type Item = (usize, usize);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.cur >= self.end {
+            return None;
+        }
+
+        let start = self.cur;
+        let len = cmp::min(self.chunk_len, self.end - start);
+
+        self.cur += self.chunk_len;
+        Some((start, len))
+    }
+}

+ 13 - 16
src/kernel/block.rs

@@ -1,19 +1,18 @@
-use core::cmp::Ordering;
-
+use super::{
+    constants::ENOENT,
+    mem::{paging::Page, AsMemoryBlock as _},
+    vfs::DevId,
+};
 use crate::{
     io::{Buffer, FillResult, UninitBuffer},
     prelude::*,
 };
-
 use alloc::{
     collections::btree_map::{BTreeMap, Entry},
     sync::Arc,
 };
-use bindings::{EEXIST, EINVAL, EIO, ENOENT};
-
-use lazy_static::lazy_static;
-
-use super::{mem::paging::Page, vfs::DevId};
+use bindings::{EEXIST, EINVAL, EIO};
+use core::cmp::Ordering;
 
 pub fn make_device(major: u32, minor: u32) -> DevId {
     (major << 8) & 0xff00u32 | minor & 0xffu32
@@ -72,10 +71,7 @@ impl Ord for BlockDevice {
     }
 }
 
-lazy_static! {
-    static ref BLOCK_DEVICE_LIST: Spin<BTreeMap<DevId, Arc<BlockDevice>>> =
-        Spin::new(BTreeMap::new());
-}
+static BLOCK_DEVICE_LIST: Spin<BTreeMap<DevId, Arc<BlockDevice>>> = Spin::new(BTreeMap::new());
 
 #[derive(Debug, Clone, Copy)]
 #[repr(C)]
@@ -226,14 +222,14 @@ impl BlockDevice {
                 count if count <= 8 => {
                     nread = count;
 
-                    let _page = Page::alloc_one();
+                    let _page = Page::alloc();
                     page = Some(_page);
                     pages = core::slice::from_ref(page.as_ref().unwrap());
                 }
                 count if count <= 16 => {
                     nread = count;
 
-                    let _pages = Page::alloc_many(1);
+                    let _pages = Page::alloc_order(1);
                     page = Some(_pages);
                     pages = core::slice::from_ref(page.as_ref().unwrap());
                 }
@@ -243,7 +239,7 @@ impl BlockDevice {
                     let npages = (nread + 15) / 16;
                     let mut _page_vec = Vec::with_capacity(npages as usize);
                     for _ in 0..npages {
-                        _page_vec.push(Page::alloc_many(1));
+                        _page_vec.push(Page::alloc_order(1));
                     }
                     page_vec = Some(_page_vec);
                     pages = page_vec.as_ref().unwrap().as_slice();
@@ -259,7 +255,8 @@ impl BlockDevice {
             self.read_raw(req)?;
 
             for page in pages.iter() {
-                let data = &page.as_slice()[first_sector_offset as usize..];
+                // SAFETY: We are the only owner of the page so no one could be mutating it.
+                let data = unsafe { &page.as_memblk().as_bytes()[first_sector_offset as usize..] };
                 first_sector_offset = 0;
 
                 match buffer.fill(data)? {

+ 13 - 11
src/kernel/chardev.rs

@@ -9,13 +9,14 @@ use super::{
         DevId,
     },
 };
-use crate::{io::Buffer, prelude::*, sync::AsRefPosition as _};
+use crate::{io::Buffer, prelude::*};
 use alloc::{
     boxed::Box,
     collections::btree_map::{BTreeMap, Entry},
     sync::Arc,
 };
-use lazy_static::lazy_static;
+use eonix_runtime::task::Task;
+use eonix_sync::AsProof as _;
 
 pub trait VirtualCharDevice: Send + Sync {
     fn read(&self, buffer: &mut dyn Buffer) -> KResult<usize>;
@@ -33,15 +34,12 @@ pub struct CharDevice {
     device: CharDeviceType,
 }
 
-lazy_static! {
-    pub static ref CHAR_DEVICES: Spin<BTreeMap<DevId, Arc<CharDevice>>> =
-        Spin::new(BTreeMap::new());
-}
+static CHAR_DEVICES: Spin<BTreeMap<DevId, Arc<CharDevice>>> = Spin::new(BTreeMap::new());
 
 impl CharDevice {
     pub fn read(&self, buffer: &mut dyn Buffer) -> KResult<usize> {
         match &self.device {
-            CharDeviceType::Terminal(terminal) => terminal.read(buffer),
+            CharDeviceType::Terminal(terminal) => Task::block_on(terminal.read(buffer)),
             CharDeviceType::Virtual(device) => device.read(buffer),
         }
     }
@@ -75,13 +73,17 @@ impl CharDevice {
     pub fn open(self: &Arc<Self>) -> KResult<Arc<File>> {
         Ok(match &self.device {
             CharDeviceType::Terminal(terminal) => {
-                let procs = ProcessList::get().lock_shared();
+                let procs = Task::block_on(ProcessList::get().read());
                 let current = Thread::current();
-                let session = current.process.session(procs.as_pos());
+                let session = current.process.session(procs.prove());
                 // We only set the control terminal if the process is the session leader.
                 if session.sid == Thread::current().process.pid {
                     // Silently fail if we can't set the control terminal.
-                    dont_check!(session.set_control_terminal(&terminal, false, procs.as_pos()));
+                    dont_check!(Task::block_on(session.set_control_terminal(
+                        &terminal,
+                        false,
+                        procs.prove()
+                    )));
                 }
 
                 TerminalFile::new(terminal.clone())
@@ -119,7 +121,7 @@ struct ConsoleDevice;
 impl VirtualCharDevice for ConsoleDevice {
     fn read(&self, buffer: &mut dyn Buffer) -> KResult<usize> {
         let console_terminal = get_console().ok_or(EIO)?;
-        console_terminal.read(buffer)
+        Task::block_on(console_terminal.read(buffer))
     }
 
     fn write(&self, data: &[u8]) -> KResult<usize> {

+ 1 - 4
src/kernel/console.rs

@@ -1,10 +1,7 @@
 use crate::prelude::*;
 use alloc::sync::Arc;
-use lazy_static::lazy_static;
 
-lazy_static! {
-    pub static ref CONSOLE: Spin<Option<Arc<Terminal>>> = Spin::new(None);
-}
+static CONSOLE: Spin<Option<Arc<Terminal>>> = Spin::new(None);
 
 pub fn set_console(terminal: Arc<Terminal>) -> KResult<()> {
     let mut console = CONSOLE.lock();

+ 2 - 3
src/kernel/constants.rs

@@ -13,9 +13,6 @@ pub const SIG_BLOCK: u32 = 0;
 pub const SIG_UNBLOCK: u32 = 1;
 pub const SIG_SETMASK: u32 = 2;
 
-pub const SA_SIGINFO: u32 = 4;
-pub const SA_RESTORER: u32 = 0x04000000;
-
 pub const CLOCK_REALTIME: u32 = 0;
 pub const CLOCK_MONOTONIC: u32 = 1;
 
@@ -23,7 +20,9 @@ pub const ENOENT: u32 = 2;
 pub const EIO: u32 = 5;
 pub const ENXIO: u32 = 6;
 pub const ENOEXEC: u32 = 8;
+pub const EFAULT: u32 = 14;
 pub const EEXIST: u32 = 17;
+pub const EINVAL: u32 = 22;
 pub const ENOSYS: u32 = 38;
 
 #[allow(dead_code)]

Niektóre pliki nie zostały wyświetlone z powodu dużej ilości zmienionych plików