Преглед на файлове

Merge pull request #18 from greatbridf/dev-drivers

Add AHCI port write command and enhance drivers
greatbridf преди 7 месеца
родител
ревизия
481948d2cb

+ 1 - 27
Cargo.lock

@@ -106,8 +106,7 @@ dependencies = [
  "eonix_sync_base",
  "fdt",
  "intrusive_list",
- "riscv 0.13.0",
- "riscv-peripheral",
+ "riscv",
  "sbi",
 ]
 
@@ -339,19 +338,6 @@ dependencies = [
  "riscv-pac",
 ]
 
-[[package]]
-name = "riscv"
-version = "0.14.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0f1671c79a01a149fe000af2429ce9ccc8e58cdecda72672355d50e5536b363c"
-dependencies = [
- "critical-section",
- "embedded-hal",
- "paste",
- "riscv-macros",
- "riscv-pac",
-]
-
 [[package]]
 name = "riscv-macros"
 version = "0.2.0"
@@ -369,18 +355,6 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8188909339ccc0c68cfb5a04648313f09621e8b87dc03095454f1a11f6c5d436"
 
-[[package]]
-name = "riscv-peripheral"
-version = "0.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d50c11ddf3e2a21206642bfe6d5e06f76d5225c1fbece09952dcd40f8c49409a"
-dependencies = [
- "embedded-hal",
- "paste",
- "riscv 0.14.0",
- "riscv-pac",
-]
-
 [[package]]
 name = "safe-mmio"
 version = "0.2.5"

+ 0 - 1
crates/eonix_hal/Cargo.toml

@@ -21,6 +21,5 @@ intrusive_list = { path = "../intrusive_list" }
 buddy_allocator = { path = "../buddy_allocator" }
 sbi = "0.3.0"
 riscv = { version = "0.13.0", features = ["s-mode"] }
-riscv-peripheral = { version = "0.3.0" }
 fdt = "0.1"
 bitflags = "2.6.0"

+ 11 - 4
crates/eonix_hal/eonix_hal_traits/src/trap.rs

@@ -8,9 +8,12 @@ use eonix_mm::address::VAddr;
 /// and will be used in the HAL crates.
 #[doc(notable_trait)]
 pub trait RawTrapContext: Copy {
+    type FIrq: FnOnce(fn(irqno: usize));
+    type FTimer: FnOnce(fn());
+
     fn new() -> Self;
 
-    fn trap_type(&self) -> TrapType;
+    fn trap_type(&self) -> TrapType<Self::FIrq, Self::FTimer>;
 
     fn get_program_counter(&self) -> usize;
     fn get_stack_pointer(&self) -> usize;
@@ -56,11 +59,15 @@ pub trait IrqState {
 }
 
 /// The reason that caused the trap.
-pub enum TrapType {
+pub enum TrapType<FIrq, FTimer>
+where
+    FIrq: FnOnce(fn(irqno: usize)),
+    FTimer: FnOnce(fn()),
+{
     Syscall { no: usize, args: [usize; 6] },
     Fault(Fault),
-    Irq(usize),
-    Timer,
+    Irq { callback: FIrq },
+    Timer { callback: FTimer },
 }
 
 /// A marker type that indicates that the type is a raw trap context.

+ 6 - 3
crates/eonix_hal/src/arch/riscv64/bootstrap.rs

@@ -1,6 +1,8 @@
 use super::{
     config::{self, mm::*},
     console::write_str,
+    cpu::CPUID,
+    time::set_next_timer,
     trap::TRAP_SCRATCH,
 };
 use crate::{
@@ -8,7 +10,6 @@ use crate::{
         cpu::CPU,
         fdt::{init_dtb_and_fdt, FdtExt},
         mm::{ArchPhysAccess, FreeRam, PageAttribute64, GLOBAL_PAGE_TABLE},
-        interrupt::enable_timer_interrupt,
     },
     bootstrap::BootStrapData,
     mm::{ArchMemory, ArchPagingMode, BasicPageAlloc, BasicPageAllocRef, ScopedAllocator},
@@ -209,9 +210,11 @@ fn setup_cpu(alloc: impl PageAlloc, hart_id: usize) {
         }
     });
 
+    CPUID.set(hart_id);
+
     let mut cpu = CPU::local();
     unsafe {
-        cpu.as_mut().init(hart_id);
+        cpu.as_mut().init();
     }
 
     percpu_area.register(cpu.cpuid());
@@ -224,7 +227,7 @@ fn setup_cpu(alloc: impl PageAlloc, hart_id: usize) {
     }
 
     // set current hart's mtimecmp register
-    enable_timer_interrupt();
+    set_next_timer();
 }
 
 /// TODO

+ 11 - 22
crates/eonix_hal/src/arch/riscv64/cpu.rs

@@ -2,10 +2,7 @@ use super::{
     interrupt::InterruptControl,
     trap::{setup_trap, TRAP_SCRATCH},
 };
-use crate::arch::{
-    fdt::{FdtExt, FDT},
-    time::set_next_timer,
-};
+use crate::arch::fdt::{FdtExt, FDT};
 use core::{arch::asm, pin::Pin, ptr::NonNull};
 use eonix_preempt::PreemptGuard;
 use eonix_sync_base::LazyLock;
@@ -13,11 +10,13 @@ use riscv::register::{
     medeleg::{self, Medeleg},
     mhartid, sscratch, sstatus,
 };
-use riscv_peripheral::plic::PLIC;
 use sbi::PhysicalAddress;
 
 #[eonix_percpu::define_percpu]
-static LOCAL_CPU: LazyLock<CPU> = LazyLock::new(CPU::new);
+pub static CPUID: usize = 0;
+
+#[eonix_percpu::define_percpu]
+static LOCAL_CPU: LazyLock<CPU> = LazyLock::new(|| CPU::new(CPUID.get()));
 
 #[derive(Debug, Clone)]
 pub enum UserTLS {
@@ -26,8 +25,7 @@ pub enum UserTLS {
 
 /// RISC-V Hart
 pub struct CPU {
-    hart_id: usize,
-    interrupt: InterruptControl,
+    pub(crate) interrupt: InterruptControl,
 }
 
 impl UserTLS {
@@ -37,10 +35,9 @@ impl UserTLS {
 }
 
 impl CPU {
-    pub fn new() -> Self {
+    fn new(cpuid: usize) -> Self {
         Self {
-            hart_id: 0,
-            interrupt: InterruptControl::new(),
+            interrupt: InterruptControl::new(cpuid),
         }
     }
 
@@ -49,10 +46,8 @@ impl CPU {
     /// # Safety
     /// This function performs low-level hardware initialization and should
     /// only be called once per Hart during its boot sequence.
-    pub unsafe fn init(mut self: Pin<&mut Self>, hart_id: usize) {
+    pub unsafe fn init(mut self: Pin<&mut Self>) {
         let me = self.as_mut().get_unchecked_mut();
-        me.hart_id = hart_id;
-
         setup_trap();
 
         let interrupt = self.map_unchecked_mut(|me| &mut me.interrupt);
@@ -65,7 +60,7 @@ impl CPU {
     /// Boot all other hart.
     pub unsafe fn bootstrap_cpus(&self) {
         let total_harts = FDT.hart_count();
-        for i in (0..total_harts).filter(|&i| i != self.hart_id) {
+        for i in (0..total_harts).filter(|&i| i != self.cpuid()) {
             sbi::hsm::hart_start(i, todo!("AP entry"), 0)
                 .expect("Failed to start secondary hart via SBI");
         }
@@ -81,12 +76,6 @@ impl CPU {
         // nothing
     }
 
-    pub fn end_of_interrupt(self: Pin<&mut Self>) {
-        // TODO: only timer interrupt should do this, here may need to change 
-        // if some other interrupt need send end signal
-        set_next_timer();
-    }
-
     pub fn local() -> PreemptGuard<Pin<&'static mut Self>> {
         unsafe {
             // SAFETY: We pass the reference into a `PreemptGuard`, which ensures
@@ -96,7 +85,7 @@ impl CPU {
     }
 
     pub fn cpuid(&self) -> usize {
-        self.hart_id
+        CPUID.get()
     }
 }
 

+ 131 - 27
crates/eonix_hal/src/arch/riscv64/interrupt/mod.rs

@@ -1,48 +1,152 @@
-use core::pin::Pin;
-
+use super::{config::platform::virt::*, fdt::FDT, fence::memory_barrier, mm::ArchPhysAccess};
 use crate::arch::time;
-
-use super::config::platform::virt::*;
+use core::{pin::Pin, ptr::NonNull};
+use eonix_mm::address::{PAddr, PhysAccess};
+use eonix_sync_base::LazyLock;
 use riscv::register::sie;
-use riscv_peripheral::{
-    aclint::{Clint, CLINT},
-    plic::{Plic, PLIC},
-};
 use sbi::SbiError;
 
-#[derive(Clone, Copy)]
-struct ArchPlic;
+const PRIORITY_OFFSET: usize = 0x0;
+const PENDING_OFFSET: usize = 0x1000;
 
-#[derive(Clone, Copy)]
-struct ArchClint;
+const ENABLE_OFFSET: usize = 0x2000;
+const THRESHOLD_OFFSET: usize = 0x200000;
+const CLAIM_COMPLETE_OFFSET: usize = 0x200004;
 
-unsafe impl Plic for ArchPlic {
-    const BASE: usize = PLIC_BASE;
-}
+const ENABLE_STRIDE: usize = 0x80;
+const CONTEXT_STRIDE: usize = 0x1000;
+
+static PLIC_BASE: LazyLock<PAddr> = LazyLock::new(|| {
+    let plic = FDT
+        .find_compatible(&["riscv,plic0", "riscv,plic1"])
+        .expect("Failed to find PLIC in FDT");
+
+    let reg = plic
+        .reg()
+        .expect("PLIC node has no reg property")
+        .next()
+        .expect("PLIC reg property is empty");
+
+    PAddr::from(reg.starting_address as usize)
+});
 
-unsafe impl Clint for ArchClint {
-    const BASE: usize = CLINT_BASE;
-    const MTIME_FREQ: usize = CPU_FREQ_HZ as usize;
+pub struct PLIC {
+    enable: NonNull<u32>,
+    threshold: NonNull<u32>,
+    claim_complete: NonNull<u32>,
 }
 
-/// Architecture-specific interrupt control block.
 pub struct InterruptControl {
-    clint: CLINT<ArchClint>,
+    pub plic: PLIC,
+}
+
+impl PLIC {
+    fn new(cpuid: usize) -> Self {
+        let base = *PLIC_BASE.get();
+
+        let enable = PAddr::from(base + (cpuid * 2 + 1) * ENABLE_STRIDE + ENABLE_OFFSET);
+        let threshold = PAddr::from(base + (cpuid * 2 + 1) * CONTEXT_STRIDE + THRESHOLD_OFFSET);
+        let claim_complete =
+            PAddr::from(base + (cpuid * 2 + 1) * CONTEXT_STRIDE + CLAIM_COMPLETE_OFFSET);
+
+        unsafe {
+            // SAFETY: The PLIC registers are memory-mapped and placed at specific addresses.
+            //         We are pretty sure that the addresses are valid.
+            Self {
+                enable: ArchPhysAccess::as_ptr(enable),
+                threshold: ArchPhysAccess::as_ptr(threshold),
+                claim_complete: ArchPhysAccess::as_ptr(claim_complete),
+            }
+        }
+    }
+
+    pub fn set_threshold(&self, threshold: u32) {
+        unsafe {
+            self.threshold.write_volatile(threshold);
+        }
+    }
+
+    pub fn set_priority(&self, interrupt: usize, priority: u32) {
+        let priority_ptr = unsafe {
+            // SAFETY: The PLIC priority register is memory-mapped and placed at a specific address.
+            //         We are pretty sure that the address is valid.
+            ArchPhysAccess::as_ptr(
+                *PLIC_BASE.get() + PRIORITY_OFFSET + interrupt * size_of::<u32>(),
+            )
+        };
+
+        memory_barrier();
+
+        unsafe {
+            priority_ptr.write_volatile(priority);
+        }
+
+        memory_barrier();
+    }
+
+    pub fn claim_interrupt(&self) -> Option<usize> {
+        match unsafe { self.claim_complete.read_volatile() } {
+            0 => None,
+            interrupt => Some(interrupt as usize),
+        }
+    }
+
+    pub fn complete_interrupt(&self, interrupt: usize) {
+        unsafe {
+            self.claim_complete.write_volatile(interrupt as u32);
+        }
+    }
+
+    pub fn enable_interrupt(&self, interrupt: usize) {
+        debug_assert!(interrupt < 1024, "Interrupt number out of range");
+
+        let enable_ptr = unsafe {
+            // SAFETY: Interrupt number is guaranteed to be less than 1024,
+            //         so we won't overflow the enable register array.
+            self.enable.add(interrupt / 32)
+        };
+
+        let bit = 1 << (interrupt % 32);
+        unsafe {
+            enable_ptr.write_volatile(enable_ptr.read_volatile() | bit);
+        }
+    }
+
+    pub fn disable_interrupt(&self, interrupt: usize) {
+        let enable_ptr = unsafe {
+            // SAFETY: Interrupt number is guaranteed to be less than 1024,
+            //         so we won't overflow the enable register array.
+            self.enable.add(interrupt / 32)
+        };
+
+        let bit = 1 << (interrupt % 32);
+        unsafe {
+            enable_ptr.write_volatile(enable_ptr.read_volatile() & !bit);
+        }
+    }
 }
 
 impl InterruptControl {
     /// # Safety
     /// should be called only once.
-    pub(crate) fn new() -> Self {
+    pub(crate) fn new(cpuid: usize) -> Self {
         Self {
-            clint: CLINT::new(),
+            plic: PLIC::new(cpuid),
         }
     }
 
-    pub fn init(self: Pin<&mut Self>) {}
-}
+    pub fn init(self: Pin<&mut Self>) {
+        self.plic.set_threshold(0);
+
+        // TODO: We should enable interrupts only when we register a handler.
+        for i in 0..32 {
+            self.plic.set_priority(i, 1);
+            self.plic.enable_interrupt(i);
+        }
 
-pub fn enable_timer_interrupt() {
-    unsafe { sie::set_stimer() };
-    time::set_next_timer();
+        unsafe {
+            sie::set_stimer();
+            sie::set_sext();
+        }
+    }
 }

+ 1 - 4
crates/eonix_hal/src/arch/riscv64/trap/mod.rs

@@ -15,10 +15,7 @@ use riscv::register::stvec::TrapMode;
 use riscv::register::{scause, sepc, stval};
 use riscv::{
     asm::sfence_vma_all,
-    register::{
-        sie,
-        stvec::{self, Stvec},
-    },
+    register::stvec::{self, Stvec},
 };
 use sbi::SbiError;
 

+ 23 - 4
crates/eonix_hal/src/arch/riscv64/trap/trap_context.rs

@@ -1,3 +1,4 @@
+use crate::{arch::time::set_next_timer, processor::CPU};
 use core::arch::asm;
 use eonix_hal_traits::{
     fault::{Fault, PageFaultErrorCode},
@@ -117,6 +118,9 @@ impl TrapContext {
 }
 
 impl RawTrapContext for TrapContext {
+    type FIrq = fn(handler: fn(irqno: usize));
+    type FTimer = fn(handler: fn());
+
     fn new() -> Self {
         let mut sstatus = Sstatus::from_bits(0);
         sstatus.set_fs(FS::Initial);
@@ -130,14 +134,29 @@ impl RawTrapContext for TrapContext {
         }
     }
 
-    fn trap_type(&self) -> TrapType {
+    fn trap_type(&self) -> TrapType<Self::FIrq, Self::FTimer> {
         let cause = self.scause.cause();
         match cause {
             Trap::Interrupt(i) => {
                 match Interrupt::from_number(i).unwrap() {
-                    Interrupt::SupervisorTimer => TrapType::Timer,
-                    // TODO: need to read plic
-                    Interrupt::SupervisorExternal => TrapType::Irq(0),
+                    Interrupt::SupervisorTimer => TrapType::Timer {
+                        callback: |handler| {
+                            set_next_timer();
+                            handler();
+                        },
+                    },
+                    Interrupt::SupervisorExternal => TrapType::Irq {
+                        callback: |handler| {
+                            let mut cpu = CPU::local();
+                            match cpu.as_mut().interrupt.plic.claim_interrupt() {
+                                None => {}
+                                Some(irqno) => {
+                                    cpu.interrupt.plic.complete_interrupt(irqno);
+                                    handler(irqno);
+                                }
+                            }
+                        },
+                    },
                     // soft interrupt
                     _ => TrapType::Fault(Fault::Unknown(i)),
                 }

+ 50 - 5
crates/eonix_hal/src/arch/x86_64/interrupt.rs

@@ -146,12 +146,57 @@ impl InterruptControl {
     pub fn setup_timer(&self) {
         self.apic_base.task_priority().write(0);
         self.apic_base.timer_divide().write(0x3); // Divide by 16
-        self.apic_base.timer_register().write(0x20040);
+        self.apic_base.timer_register().write(0x0040);
+
+        // Setup the PIT to generate interrupts at 100Hz.
+        unsafe {
+            asm!(
+                "in $0x61, %al",
+                "and $0xfd, %al",
+                "or $0x1, %al",
+                "out %al, %dx",
+                "mov $0xb2, %al",
+                "out %al, $0x43",
+                "mov $0x9b, %al",
+                "out %al, $0x42",
+                "in $0x60, %al",
+                "mov $0x2e, %al",
+                "out %al, $0x42",
+                "in $0x61, %al",
+                "and $0xfe, %al",
+                "out %al, $0x61",
+                "or $0x1, %al",
+                "out %al, $0x61",
+                out("eax") _,
+                out("edx") _,
+                options(att_syntax, nomem, nostack, preserves_flags),
+            );
+        }
+
+        self.apic_base.timer_initial_count().write(u32::MAX);
+
+        unsafe {
+            asm!(
+                "2:",
+                "in $0x61, %al",
+                "and $0x20, %al",
+                "jz 2b",
+                out("ax") _,
+                options(att_syntax, nomem, nostack, preserves_flags),
+            )
+        }
+
+        self.apic_base.timer_register().write(0x10000);
 
-        // TODO: Get the bus frequency from...?
-        let freq = 200;
-        let count = freq * 1_000_000 / 16 / 100;
-        self.apic_base.timer_initial_count().write(count as u32);
+        let counts = self.apic_base.timer_current_count().read();
+        let freq = (u32::MAX - counts) as u64 * 16 * 100;
+
+        self.apic_base
+            .timer_initial_count()
+            .write((freq / 16 / 1_000) as u32);
+
+        self.apic_base.timer_register().write(0x20040);
+        self.apic_base.timer_divide().write(0x3); // Divide by 16
     }
 
     pub fn setup_idt(self: Pin<&mut Self>) {

+ 1 - 0
crates/eonix_hal/src/arch/x86_64/io.rs

@@ -1,5 +1,6 @@
 use core::arch::asm;
 
+#[derive(Clone, Copy)]
 pub struct Port8 {
     no: u16,
 }

+ 28 - 3
crates/eonix_hal/src/arch/x86_64/trap/trap_context.rs

@@ -1,3 +1,4 @@
+use crate::processor::CPU;
 use core::arch::asm;
 use eonix_hal_traits::{
     fault::{Fault, PageFaultErrorCode},
@@ -77,16 +78,24 @@ impl TrapContext {
 }
 
 impl RawTrapContext for TrapContext {
+    type FIrq = impl FnOnce(fn(irqno: usize));
+    type FTimer = fn(handler: fn());
+
     fn new() -> Self {
         Self {
             ..Default::default()
         }
     }
 
-    fn trap_type(&self) -> TrapType {
+    fn trap_type(&self) -> TrapType<Self::FIrq, Self::FTimer> {
         match self.int_no {
             0..0x20 => TrapType::Fault(self.get_fault_type()),
-            0x40 => TrapType::Timer,
+            0x40 => TrapType::Timer {
+                callback: |handler| {
+                    CPU::local().as_mut().end_of_interrupt();
+                    handler();
+                },
+            },
             0x80 => TrapType::Syscall {
                 no: self.rax as usize,
                 args: [
@@ -98,7 +107,23 @@ impl RawTrapContext for TrapContext {
                     self.rbp as usize,
                 ],
             },
-            no => TrapType::Irq(no as usize - 0x20),
+            no => TrapType::Irq {
+                callback: move |handler| {
+                    let irqno = no as usize - 0x20;
+
+                    use crate::arch::io::Port8;
+
+                    const PIC1_COMMAND: Port8 = Port8::new(0x20);
+                    const PIC2_COMMAND: Port8 = Port8::new(0xA0);
+
+                    PIC1_COMMAND.write(0x20); // EOI
+                    if irqno >= 8 {
+                        PIC2_COMMAND.write(0x20); // EOI
+                    }
+
+                    handler(irqno);
+                },
+            },
         }
     }
 

+ 1 - 0
crates/eonix_hal/src/lib.rs

@@ -1,6 +1,7 @@
 #![no_std]
 #![feature(allocator_api)]
 #![feature(doc_notable_trait)]
+#![feature(impl_trait_in_assoc_type)]
 
 pub(crate) mod arch;
 

+ 0 - 1
src/driver.rs

@@ -1,6 +1,5 @@
 pub mod ahci;
 pub mod e1000e;
-#[cfg(target_arch = "x86_64")]
 pub mod serial;
 
 #[cfg(target_arch = "riscv64")]

+ 43 - 0
src/driver/ahci/command.rs

@@ -89,3 +89,46 @@ impl Command for ReadLBACommand<'_> {
         false
     }
 }
+
+pub struct WriteLBACommand<'a> {
+    pages: &'a [Page],
+    lba: u64,
+    count: u16,
+}
+
+impl<'a> WriteLBACommand<'a> {
+    pub fn new(pages: &'a [Page], lba: u64, count: u16) -> KResult<Self> {
+        if pages.len() > 248 {
+            return Err(EINVAL);
+        }
+
+        let buffer_tot_len = pages.iter().fold(0, |acc, page| acc + page.len());
+        if buffer_tot_len < count as usize * 512 {
+            return Err(EINVAL);
+        }
+
+        Ok(Self { pages, lba, count })
+    }
+}
+
+impl Command for WriteLBACommand<'_> {
+    fn pages(&self) -> &[Page] {
+        self.pages
+    }
+
+    fn lba(&self) -> u64 {
+        self.lba
+    }
+
+    fn count(&self) -> u16 {
+        self.count
+    }
+
+    fn cmd(&self) -> u8 {
+        0xCA
+    }
+
+    fn write(&self) -> bool {
+        true
+    }
+}

+ 2 - 1
src/driver/ahci/mod.rs

@@ -13,6 +13,7 @@ use alloc::{format, sync::Arc};
 use control::AdapterControl;
 use defs::*;
 use eonix_mm::address::{AddrOps as _, PAddr};
+use eonix_runtime::task::Task;
 use eonix_sync::SpinIrq as _;
 use port::AdapterPort;
 
@@ -132,7 +133,7 @@ impl Device<'static> {
                     port,
                 )?;
 
-                port.partprobe()?;
+                Task::block_on(port.partprobe())?;
 
                 Ok(())
             })() {

+ 28 - 8
src/driver/ahci/port.rs

@@ -1,4 +1,4 @@
-use super::command::{Command, IdentifyCommand, ReadLBACommand};
+use super::command::{Command, IdentifyCommand, ReadLBACommand, WriteLBACommand};
 use super::slot::CommandSlot;
 use super::stats::AdapterPortStats;
 use super::{
@@ -275,13 +275,33 @@ impl BlockRequestQueue for AdapterPort<'_> {
     }
 
     fn submit(&self, req: BlockDeviceRequest) -> KResult<()> {
-        // TODO: check disk size limit using newtype
-        if req.count > 65535 {
-            return Err(EINVAL);
+        match req {
+            BlockDeviceRequest::Read {
+                sector,
+                count,
+                buffer,
+            } => {
+                if count > 65535 {
+                    return Err(EINVAL);
+                }
+
+                let command = ReadLBACommand::new(buffer, sector, count as u16)?;
+
+                self.send_command(&command)
+            }
+            BlockDeviceRequest::Write {
+                sector,
+                count,
+                buffer,
+            } => {
+                if count > 65535 {
+                    return Err(EINVAL);
+                }
+
+                let command = WriteLBACommand::new(buffer, sector, count as u16)?;
+
+                self.send_command(&command)
+            }
         }
-
-        let command = ReadLBACommand::new(req.buffer, req.sector, req.count as u16)?;
-
-        self.send_command(&command)
     }
 }

+ 474 - 431
src/driver/e1000e.rs

@@ -1,435 +1,478 @@
-// use crate::prelude::*;
-//
-// use crate::kernel::interrupt::register_irq_handler;
-// use crate::kernel::mem::{paging, phys};
-// use crate::net::netdev;
-// use alloc::boxed::Box;
-// use alloc::vec::Vec;
-// use paging::Page;
-// use phys::{NoCachePP, PhysPtr};
-//
-// mod defs;
-//
-// #[repr(C)]
-// struct RxDescriptor {
-//     buffer: u64,
-//     length: u16,
-//     checksum: u16,
-//     status: u8,
-//     errors: u8,
-//     vlan: u16,
-// }
-//
-// #[repr(C)]
-// struct TxDescriptor {
-//     buffer: u64,
-//     length: u16,
-//     cso: u8, // Checksum offset
-//     cmd: u8,
-//     status: u8,
-//     css: u8, // Checksum start
-//     vlan: u16,
-// }
-//
-// const RX_DESC_SIZE: usize = 32;
-// const TX_DESC_SIZE: usize = 32;
-//
-// struct E1000eDev {
-//     mac: netdev::Mac,
-//     status: netdev::LinkStatus,
-//     speed: netdev::LinkSpeed,
-//     id: u32,
-//
-//     base: NoCachePP,
-//     rt_desc_page: Page,
-//     rx_head: Option<u32>,
-//     rx_tail: Option<u32>,
-//     tx_tail: Option<u32>,
-//
-//     rx_buffers: Option<Box<Vec<Page>>>,
-//     tx_buffers: Option<Box<Vec<Page>>>,
-// }
-//
-// fn test(val: u32, bit: u32) -> bool {
-//     (val & bit) == bit
-// }
-//
-// struct PrintableBytes<'a>(&'a [u8]);
-//
-// impl core::fmt::Debug for PrintableBytes<'_> {
-//     fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
-//         write!(f, "PrintableBytes {{")?;
-//         for chunk in self.0.chunks(16) {
-//             for &byte in chunk {
-//                 write!(f, "{byte} ")?;
-//             }
-//             write!(f, "\n")?;
-//         }
-//         write!(f, "}}")?;
-//
-//         Ok(())
-//     }
-// }
-//
-// impl netdev::Netdev for E1000eDev {
-//     fn mac(&self) -> netdev::Mac {
-//         self.mac
-//     }
-//
-//     fn link_status(&self) -> netdev::LinkStatus {
-//         self.status
-//     }
-//
-//     fn link_speed(&self) -> netdev::LinkSpeed {
-//         self.speed
-//     }
-//
-//     fn id(&self) -> u32 {
-//         self.id
-//     }
-//
-//     fn up(&mut self) -> Result<(), u32> {
-//         let ctrl = self.read(defs::REG_CTRL);
-//         let status = self.read(defs::REG_STAT);
-//
-//         // check link up
-//         if !test(ctrl, defs::CTRL_SLU) || !test(status, defs::STAT_LU) {
-//             return Err(EIO);
-//         }
-//
-//         // auto negotiation of speed
-//         match status & defs::STAT_SPEED_MASK {
-//             defs::STAT_SPEED_10M => self.speed = netdev::LinkSpeed::Speed10M,
-//             defs::STAT_SPEED_100M => self.speed = netdev::LinkSpeed::Speed100M,
-//             defs::STAT_SPEED_1000M => self.speed = netdev::LinkSpeed::Speed1000M,
-//             _ => return Err(EINVAL),
-//         }
-//
-//         // clear multicast table
-//         for i in (0..128).step_by(4) {
-//             self.write(defs::REG_MTA + i, 0);
-//         }
-//
-//         self.clear_stats()?;
-//
-//         // setup interrupt handler
-//         let device = netdev::get_netdev(self.id).unwrap();
-//         let handler = move || {
-//             eonix_runtime::task::Task::block_on(device.lock())
-//                 .fire()
-//                 .unwrap();
-//         };
-//
-//         register_irq_handler(0xb, handler)?;
-//
-//         // enable interrupts
-//         self.write(defs::REG_IMS, defs::ICR_NORMAL | defs::ICR_UP);
-//
-//         // read to clear any pending interrupts
-//         self.read(defs::REG_ICR);
-//
-//         self.setup_rx()?;
-//         self.setup_tx()?;
-//
-//         self.status = netdev::LinkStatus::Up;
-//
-//         Ok(())
-//     }
-//
-//     fn fire(&mut self) -> Result<(), u32> {
-//         let cause = self.read(defs::REG_ICR);
-//         if !test(cause, defs::ICR_INT) {
-//             return Ok(());
-//         }
-//
-//         loop {
-//             let tail = self.rx_tail.ok_or(EIO)?;
-//             let next_tail = (tail + 1) % RX_DESC_SIZE as u32;
-//
-//             if next_tail == self.read(defs::REG_RDH) {
-//                 break;
-//             }
-//
-//             let ref mut desc = self.rx_desc_table()[next_tail as usize];
-//             if !test(desc.status as u32, defs::RXD_STAT_DD as u32) {
-//                 Err(EIO)?;
-//             }
-//
-//             desc.status = 0;
-//             let len = desc.length as usize;
-//
-//             let buffers = self.rx_buffers.as_mut().ok_or(EIO)?;
-//             let data = &buffers[next_tail as usize].as_slice()[..len];
-//
-//             println_debug!("e1000e: received {len} bytes, {:?}", PrintableBytes(data));
-//             self.rx_tail = Some(next_tail);
-//         }
-//
-//         Ok(())
-//     }
-//
-//     fn send(&mut self, buf: &[u8]) -> Result<(), u32> {
-//         let tail = self.tx_tail.ok_or(EIO)?;
-//         let head = self.read(defs::REG_TDH);
-//         let next_tail = (tail + 1) % TX_DESC_SIZE as u32;
-//
-//         if next_tail == head {
-//             return Err(EAGAIN);
-//         }
-//
-//         let ref mut desc = self.tx_desc_table()[tail as usize];
-//         if !test(desc.status as u32, defs::TXD_STAT_DD as u32) {
-//             return Err(EIO);
-//         }
-//
-//         let buffer_page = Page::alloc_one();
-//         if buf.len() > buffer_page.len() {
-//             return Err(EFAULT);
-//         }
-//         buffer_page.as_mut_slice()[..buf.len()].copy_from_slice(buf);
-//
-//         desc.buffer = buffer_page.as_phys() as u64;
-//         desc.length = buf.len() as u16;
-//         desc.cmd = defs::TXD_CMD_EOP | defs::TXD_CMD_IFCS | defs::TXD_CMD_RS;
-//         desc.status = 0;
-//
-//         self.tx_tail = Some(next_tail);
-//         self.write(defs::REG_TDT, next_tail);
-//
-//         // TODO: check if the packets are sent and update self.tx_head state
-//
-//         Ok(())
-//     }
-// }
-//
-// impl E1000eDev {
-//     fn setup_rx(&mut self) -> Result<(), u32> {
-//         if !self.rx_head.is_none() || !self.rx_tail.is_none() {
-//             return Err(EINVAL);
-//         }
-//
-//         let addr = self.rt_desc_page.as_phys();
-//
-//         self.write(defs::REG_RDBAL, addr as u32);
-//         self.write(defs::REG_RDBAH, (addr >> 32) as u32);
-//
-//         self.write(
-//             defs::REG_RDLEN,
-//             (RX_DESC_SIZE * size_of::<RxDescriptor>()) as u32,
-//         );
-//
-//         self.write(defs::REG_RDH, 0);
-//         self.write(defs::REG_RDT, RX_DESC_SIZE as u32 - 1);
-//
-//         self.rx_head = Some(0);
-//         self.rx_tail = Some(RX_DESC_SIZE as u32 - 1);
-//
-//         self.write(
-//             defs::REG_RCTL,
-//             defs::RCTL_EN
-//                 | defs::RCTL_MPE
-//                 | defs::RCTL_LPE
-//                 | defs::RCTL_LBM_NO
-//                 | defs::RCTL_DTYP_LEGACY
-//                 | defs::RCTL_BAM
-//                 | defs::RCTL_BSIZE_8192
-//                 | defs::RCTL_SECRC,
-//         );
-//
-//         Ok(())
-//     }
-//
-//     fn setup_tx(&mut self) -> Result<(), u32> {
-//         if !self.tx_tail.is_none() {
-//             return Err(EINVAL);
-//         }
-//
-//         let addr = self.rt_desc_page.as_phys() + 0x200;
-//
-//         self.write(defs::REG_TDBAL, addr as u32);
-//         self.write(defs::REG_TDBAH, (addr >> 32) as u32);
-//
-//         self.write(
-//             defs::REG_TDLEN,
-//             (TX_DESC_SIZE * size_of::<TxDescriptor>()) as u32,
-//         );
-//
-//         self.write(defs::REG_TDH, 0);
-//         self.write(defs::REG_TDT, 0);
-//
-//         self.tx_tail = Some(0);
-//
-//         self.write(
-//             defs::REG_TCTL,
-//             defs::TCTL_EN
-//                 | defs::TCTL_PSP
-//                 | (15 << defs::TCTL_CT_SHIFT)
-//                 | (64 << defs::TCTL_COLD_SHIFT)
-//                 | defs::TCTL_RTLC,
-//         );
-//
-//         Ok(())
-//     }
-//
-//     fn reset(&self) -> Result<(), u32> {
-//         // disable interrupts so we won't mess things up
-//         self.write(defs::REG_IMC, 0xffffffff);
-//
-//         let ctrl = self.read(defs::REG_CTRL);
-//         self.write(defs::REG_CTRL, ctrl | defs::CTRL_GIOD);
-//
-//         while self.read(defs::REG_STAT) & defs::STAT_GIOE != 0 {
-//             // wait for link up
-//         }
-//
-//         let ctrl = self.read(defs::REG_CTRL);
-//         self.write(defs::REG_CTRL, ctrl | defs::CTRL_RST);
-//
-//         while self.read(defs::REG_CTRL) & defs::CTRL_RST != 0 {
-//             // wait for reset
-//         }
-//
-//         // disable interrupts again
-//         self.write(defs::REG_IMC, 0xffffffff);
-//
-//         Ok(())
-//     }
-//
-//     fn clear_stats(&self) -> Result<(), u32> {
-//         self.write(defs::REG_COLC, 0);
-//         self.write(defs::REG_GPRC, 0);
-//         self.write(defs::REG_MPRC, 0);
-//         self.write(defs::REG_GPTC, 0);
-//         self.write(defs::REG_GORCL, 0);
-//         self.write(defs::REG_GORCH, 0);
-//         self.write(defs::REG_GOTCL, 0);
-//         self.write(defs::REG_GOTCH, 0);
-//         Ok(())
-//     }
-//
-//     pub fn new(base: NoCachePP) -> Result<Self, u32> {
-//         let page = Page::alloc_one();
-//
-//         page.zero();
-//
-//         let mut dev = Self {
-//             mac: [0; 6],
-//             status: netdev::LinkStatus::Down,
-//             speed: netdev::LinkSpeed::SpeedUnknown,
-//             id: netdev::alloc_id(),
-//             base,
-//             rt_desc_page: page,
-//             rx_head: None,
-//             rx_tail: None,
-//             tx_tail: None,
-//             rx_buffers: None,
-//             tx_buffers: None,
-//         };
-//
-//         dev.reset()?;
-//
-//         dev.mac = unsafe { dev.base.offset(0x5400).as_ptr::<[u8; 6]>().read() };
-//         dev.tx_buffers = Some(Box::new(Vec::with_capacity(TX_DESC_SIZE)));
-//
-//         let mut rx_buffers = Box::new(Vec::with_capacity(RX_DESC_SIZE));
-//
-//         for index in 0..RX_DESC_SIZE {
-//             let page = Page::alloc_many(2);
-//
-//             let ref mut desc = dev.rx_desc_table()[index];
-//             desc.buffer = page.as_phys() as u64;
-//             desc.status = 0;
-//
-//             rx_buffers.push(page);
-//         }
-//
-//         for index in 0..TX_DESC_SIZE {
-//             let ref mut desc = dev.tx_desc_table()[index];
-//             desc.status = defs::TXD_STAT_DD;
-//         }
-//
-//         dev.rx_buffers = Some(rx_buffers);
-//
-//         Ok(dev)
-//     }
-//
-//     fn read(&self, offset: u32) -> u32 {
-//         unsafe {
-//             self.base
-//                 .offset(offset as isize)
-//                 .as_ptr::<u32>()
-//                 .read_volatile()
-//         }
-//     }
-//
-//     fn write(&self, offset: u32, value: u32) {
-//         unsafe {
-//             self.base
-//                 .offset(offset as isize)
-//                 .as_ptr::<u32>()
-//                 .write_volatile(value)
-//         }
-//     }
-//
-//     fn rx_desc_table<'lt>(&'lt self) -> &'lt mut [RxDescriptor; RX_DESC_SIZE] {
-//         self.rt_desc_page.as_cached().as_mut()
-//     }
-//
-//     fn tx_desc_table<'lt>(&'lt self) -> &'lt mut [TxDescriptor; TX_DESC_SIZE] {
-//         self.rt_desc_page.as_cached().offset(0x200).as_mut()
-//     }
-// }
-//
-// impl Drop for E1000eDev {
-//     fn drop(&mut self) {
-//         assert_eq!(self.status, netdev::LinkStatus::Down);
-//
-//         if let Some(_) = self.rx_buffers.take() {}
-//
-//         // TODO: we should wait until all packets are sent
-//         if let Some(_) = self.tx_buffers.take() {}
-//
-//         let _ = self.rt_desc_page;
-//     }
-// }
-//
-// impl pci::pci_device {
-//     fn header0(&self) -> &pci::device_header_type0 {
-//         unsafe { self.header_type0().as_ref() }.unwrap()
-//     }
-// }
-//
-// fn do_probe_device(dev: &mut pci::pci_device) -> Result<(), u32> {
-//     let bar0 = dev.header0().bars[0];
-//
-//     if bar0 & 0xf != 0 {
-//         return Err(EINVAL);
-//     }
-//
-//     unsafe { dev.enableBusMastering() };
-//
-//     let base = NoCachePP::new((bar0 & !0xf) as usize);
-//     let e1000e = E1000eDev::new(base)?;
-//
-//     netdev::register_netdev(e1000e)?;
-//
-//     Ok(())
-// }
-//
-// unsafe extern "C" fn probe_device(dev: *mut pci::pci_device) -> i32 {
-//     let dev = dev.as_mut().unwrap();
-//     match do_probe_device(dev) {
-//         Ok(_) => 0,
-//         Err(e) => -(e as i32),
-//     }
-// }
+use crate::kernel::constants::{EAGAIN, EFAULT, EINVAL, EIO};
+use crate::kernel::interrupt::register_irq_handler;
+use crate::kernel::mem::paging::{self, AllocZeroed};
+use crate::kernel::mem::{AsMemoryBlock, PhysAccess};
+use crate::kernel::pcie::{self, Header, PCIDevice, PCIDriver, PciError};
+use crate::net::netdev;
+use crate::prelude::*;
+use alloc::boxed::Box;
+use alloc::sync::Arc;
+use alloc::vec::Vec;
+use core::ptr::NonNull;
+use eonix_hal::fence::memory_barrier;
+use eonix_mm::address::{Addr, PAddr};
+use eonix_sync::SpinIrq;
+use paging::Page;
 
-pub fn register_e1000e_driver() {
-    // let dev_ids = [0x100e, 0x10d3, 0x10ea, 0x153a];
+mod defs;
+
+#[repr(C)]
+struct RxDescriptor {
+    buffer: u64,
+    length: u16,
+    checksum: u16,
+    status: u8,
+    errors: u8,
+    vlan: u16,
+}
+
+#[repr(C)]
+struct TxDescriptor {
+    buffer: u64,
+    length: u16,
+    cso: u8, // Checksum offset
+    cmd: u8,
+    status: u8,
+    css: u8, // Checksum start
+    vlan: u16,
+}
+
+const RX_DESC_SIZE: usize = 32;
+const TX_DESC_SIZE: usize = 32;
+
+struct Registers(NonNull<()>);
+
+unsafe impl Send for Registers {}
+unsafe impl Sync for Registers {}
+
+struct E1000eDev {
+    irq_no: usize,
+
+    mac: netdev::Mac,
+    status: netdev::LinkStatus,
+    speed: netdev::LinkSpeed,
+    id: u32,
+
+    regs: Registers,
+    rt_desc_page: Page,
+    rx_head: Option<u32>,
+    rx_tail: Option<u32>,
+    tx_tail: Option<u32>,
+
+    rx_buffers: Option<Box<Vec<Page>>>,
+    tx_buffers: Option<Box<Vec<Page>>>,
+}
+
+fn test(val: u32, bit: u32) -> bool {
+    (val & bit) == bit
+}
+
+struct PrintableBytes<'a>(&'a [u8]);
+
+impl core::fmt::Debug for PrintableBytes<'_> {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        write!(f, "PrintableBytes {{")?;
+        for chunk in self.0.chunks(16) {
+            for &byte in chunk {
+                write!(f, "{byte:#2x} ")?;
+            }
+            write!(f, "\n")?;
+        }
+        write!(f, "}}")?;
+
+        Ok(())
+    }
+}
+
+impl Registers {
+    fn new(base: PAddr) -> Self {
+        Self(unsafe { base.as_ptr() })
+    }
+
+    fn read(&self, offset: u32) -> u32 {
+        let retval = unsafe {
+            // SAFETY: The offset is within the bounds of the device's memory-mapped registers.
+            self.0.byte_offset(offset as isize).cast().read_volatile()
+        };
+        memory_barrier();
+        retval
+    }
+
+    fn write(&self, offset: u32, value: u32) {
+        unsafe {
+            // SAFETY: The offset is within the bounds of the device's memory-mapped registers.
+            self.0
+                .byte_offset(offset as isize)
+                .cast()
+                .write_volatile(value);
+        }
+        memory_barrier();
+    }
+
+    fn read_as<T: Copy>(&self, offset: u32) -> T {
+        let retval = unsafe {
+            // SAFETY: The offset is within the bounds of the device's memory-mapped registers.
+            self.0.byte_offset(offset as isize).cast().read_volatile()
+        };
+        memory_barrier();
+        retval
+    }
+}
+
+impl netdev::Netdev for E1000eDev {
+    fn mac(&self) -> netdev::Mac {
+        self.mac
+    }
+
+    fn link_status(&self) -> netdev::LinkStatus {
+        self.status
+    }
+
+    fn link_speed(&self) -> netdev::LinkSpeed {
+        self.speed
+    }
+
+    fn id(&self) -> u32 {
+        self.id
+    }
+
+    fn up(&mut self) -> Result<(), u32> {
+        let ctrl = self.regs.read(defs::REG_CTRL);
+        let status = self.regs.read(defs::REG_STAT);
+
+        // check link up
+        if !test(ctrl, defs::CTRL_SLU) || !test(status, defs::STAT_LU) {
+            return Err(EIO);
+        }
+
+        // auto negotiation of speed
+        match status & defs::STAT_SPEED_MASK {
+            defs::STAT_SPEED_10M => self.speed = netdev::LinkSpeed::Speed10M,
+            defs::STAT_SPEED_100M => self.speed = netdev::LinkSpeed::Speed100M,
+            defs::STAT_SPEED_1000M => self.speed = netdev::LinkSpeed::Speed1000M,
+            _ => return Err(EINVAL),
+        }
+
+        // clear multicast table
+        for i in (0..128).step_by(4) {
+            self.regs.write(defs::REG_MTA + i, 0);
+        }
+
+        self.clear_stats()?;
+
+        // setup interrupt handler
+        let device = netdev::get_netdev(self.id).unwrap();
+        register_irq_handler(self.irq_no as _, move || {
+            device.lock().fire().unwrap();
+        })?;
+
+        // enable interrupts
+        self.regs
+            .write(defs::REG_IMS, defs::ICR_NORMAL | defs::ICR_UP);
+
+        // read to clear any pending interrupts
+        self.regs.read(defs::REG_ICR);
+
+        self.setup_rx()?;
+        self.setup_tx()?;
+
+        self.status = netdev::LinkStatus::Up;
+
+        Ok(())
+    }
+
+    fn fire(&mut self) -> Result<(), u32> {
+        let cause = self.regs.read(defs::REG_ICR);
+        if !test(cause, defs::ICR_INT) {
+            return Ok(());
+        }
+
+        loop {
+            let tail = self.rx_tail.ok_or(EIO)?;
+            let next_tail = (tail + 1) % RX_DESC_SIZE as u32;
+
+            if next_tail == self.regs.read(defs::REG_RDH) {
+                break;
+            }
+
+            let ref mut desc = self.rx_desc_table()[next_tail as usize];
+            if !test(desc.status as u32, defs::RXD_STAT_DD as u32) {
+                Err(EIO)?;
+            }
+
+            desc.status = 0;
+            let len = desc.length as usize;
+
+            let buffers = self.rx_buffers.as_mut().ok_or(EIO)?;
+            let data = unsafe {
+                // SAFETY: No one could be writing to the buffer at this point.
+                &buffers[next_tail as usize].as_memblk().as_bytes()[..len]
+            };
+
+            println_debug!("e1000e: received {len} bytes, {:?}", PrintableBytes(data));
+            self.rx_tail = Some(next_tail);
+        }
+
+        Ok(())
+    }
+
+    fn send(&mut self, buf: &[u8]) -> Result<(), u32> {
+        let tail = self.tx_tail.ok_or(EIO)?;
+        let head = self.regs.read(defs::REG_TDH);
+        let next_tail = (tail + 1) % TX_DESC_SIZE as u32;
+
+        if next_tail == head {
+            return Err(EAGAIN);
+        }
+
+        let ref mut desc = self.tx_desc_table()[tail as usize];
+        if !test(desc.status as u32, defs::TXD_STAT_DD as u32) {
+            return Err(EIO);
+        }
+
+        let buffer_page = Page::alloc();
+        if buf.len() > buffer_page.len() {
+            return Err(EFAULT);
+        }
+
+        unsafe {
+            // SAFETY: We are the only one writing to this memory block.
+            buffer_page.as_memblk().as_bytes_mut()[..buf.len()].copy_from_slice(buf);
+        }
+
+        desc.buffer = PAddr::from(buffer_page.pfn()).addr() as u64;
+        desc.length = buf.len() as u16;
+        desc.cmd = defs::TXD_CMD_EOP | defs::TXD_CMD_IFCS | defs::TXD_CMD_RS;
+        desc.status = 0;
+
+        self.tx_tail = Some(next_tail);
+        self.regs.write(defs::REG_TDT, next_tail);
+
+        // TODO: check if the packets are sent and update self.tx_head state
+
+        Ok(())
+    }
+}
 
-    // for id in dev_ids.into_iter() {
-    //     let ret = unsafe { pci::register_driver_r(0x8086, id, Some(probe_device)) };
+impl E1000eDev {
+    fn setup_rx(&mut self) -> Result<(), u32> {
+        if !self.rx_head.is_none() || !self.rx_tail.is_none() {
+            return Err(EINVAL);
+        }
+
+        let addr = PAddr::from(self.rt_desc_page.pfn()).addr();
+
+        self.regs.write(defs::REG_RDBAL, addr as u32);
+        self.regs.write(defs::REG_RDBAH, (addr >> 32) as u32);
+
+        self.regs.write(
+            defs::REG_RDLEN,
+            (RX_DESC_SIZE * size_of::<RxDescriptor>()) as u32,
+        );
+
+        self.regs.write(defs::REG_RDH, 0);
+        self.regs.write(defs::REG_RDT, RX_DESC_SIZE as u32 - 1);
+
+        self.rx_head = Some(0);
+        self.rx_tail = Some(RX_DESC_SIZE as u32 - 1);
+
+        self.regs.write(
+            defs::REG_RCTL,
+            defs::RCTL_EN
+                | defs::RCTL_MPE
+                | defs::RCTL_LPE
+                | defs::RCTL_LBM_NO
+                | defs::RCTL_DTYP_LEGACY
+                | defs::RCTL_BAM
+                | defs::RCTL_BSIZE_8192
+                | defs::RCTL_SECRC,
+        );
+
+        Ok(())
+    }
+
+    fn setup_tx(&mut self) -> Result<(), u32> {
+        if !self.tx_tail.is_none() {
+            return Err(EINVAL);
+        }
+
+        let addr = PAddr::from(self.rt_desc_page.pfn()).addr() + 0x200;
+
+        self.regs.write(defs::REG_TDBAL, addr as u32);
+        self.regs.write(defs::REG_TDBAH, (addr >> 32) as u32);
+
+        self.regs.write(
+            defs::REG_TDLEN,
+            (TX_DESC_SIZE * size_of::<TxDescriptor>()) as u32,
+        );
+
+        self.regs.write(defs::REG_TDH, 0);
+        self.regs.write(defs::REG_TDT, 0);
+
+        self.tx_tail = Some(0);
+
+        self.regs.write(
+            defs::REG_TCTL,
+            defs::TCTL_EN
+                | defs::TCTL_PSP
+                | (15 << defs::TCTL_CT_SHIFT)
+                | (64 << defs::TCTL_COLD_SHIFT)
+                | defs::TCTL_RTLC,
+        );
+
+        Ok(())
+    }
+
+    fn reset(&self) -> Result<(), u32> {
+        // disable interrupts so we won't mess things up
+        self.regs.write(defs::REG_IMC, 0xffffffff);
+
+        let ctrl = self.regs.read(defs::REG_CTRL);
+        self.regs.write(defs::REG_CTRL, ctrl | defs::CTRL_GIOD);
+
+        while self.regs.read(defs::REG_STAT) & defs::STAT_GIOE != 0 {
+            // wait for link up
+        }
+
+        let ctrl = self.regs.read(defs::REG_CTRL);
+        self.regs.write(defs::REG_CTRL, ctrl | defs::CTRL_RST);
+
+        while self.regs.read(defs::REG_CTRL) & defs::CTRL_RST != 0 {
+            // wait for reset
+        }
+
+        // disable interrupts again
+        self.regs.write(defs::REG_IMC, 0xffffffff);
+
+        Ok(())
+    }
+
+    fn clear_stats(&self) -> Result<(), u32> {
+        self.regs.write(defs::REG_COLC, 0);
+        self.regs.write(defs::REG_GPRC, 0);
+        self.regs.write(defs::REG_MPRC, 0);
+        self.regs.write(defs::REG_GPTC, 0);
+        self.regs.write(defs::REG_GORCL, 0);
+        self.regs.write(defs::REG_GORCH, 0);
+        self.regs.write(defs::REG_GOTCL, 0);
+        self.regs.write(defs::REG_GOTCH, 0);
+        Ok(())
+    }
+
+    pub fn new(base: PAddr, irq_no: usize) -> Result<Self, u32> {
+        let page = Page::zeroed();
+
+        let mut dev = Self {
+            irq_no,
+            mac: [0; 6],
+            status: netdev::LinkStatus::Down,
+            speed: netdev::LinkSpeed::SpeedUnknown,
+            id: netdev::alloc_id(),
+            regs: Registers::new(base),
+            rt_desc_page: page,
+            rx_head: None,
+            rx_tail: None,
+            tx_tail: None,
+            rx_buffers: None,
+            tx_buffers: None,
+        };
+
+        dev.reset()?;
+
+        dev.mac = dev.regs.read_as(0x5400);
+        dev.tx_buffers = Some(Box::new(Vec::with_capacity(TX_DESC_SIZE)));
+
+        let mut rx_buffers = Box::new(Vec::with_capacity(RX_DESC_SIZE));
+
+        for index in 0..RX_DESC_SIZE {
+            let page = Page::alloc_order(2);
+
+            let ref mut desc = dev.rx_desc_table()[index];
+            desc.buffer = PAddr::from(page.pfn()).addr() as u64;
+            desc.status = 0;
+
+            rx_buffers.push(page);
+        }
+
+        for index in 0..TX_DESC_SIZE {
+            let ref mut desc = dev.tx_desc_table()[index];
+            desc.status = defs::TXD_STAT_DD;
+        }
+
+        dev.rx_buffers = Some(rx_buffers);
+
+        Ok(dev)
+    }
+
+    fn rx_desc_table(&self) -> &mut [RxDescriptor; RX_DESC_SIZE] {
+        unsafe {
+            // SAFETY: TODO
+            self.rt_desc_page.as_memblk().as_ptr().as_mut()
+        }
+    }
+
+    fn tx_desc_table(&self) -> &mut [TxDescriptor; TX_DESC_SIZE] {
+        let (_, right) = self.rt_desc_page.as_memblk().split_at(0x200);
+        unsafe {
+            // SAFETY: TODO
+            right.as_ptr().as_mut()
+        }
+    }
+}
+
+impl Drop for E1000eDev {
+    fn drop(&mut self) {
+        assert_eq!(self.status, netdev::LinkStatus::Down);
+
+        if let Some(_) = self.rx_buffers.take() {}
+
+        // TODO: we should wait until all packets are sent
+        if let Some(_) = self.tx_buffers.take() {}
+
+        let _ = self.rt_desc_page;
+    }
+}
+
+struct Driver {
+    dev_id: u16,
+}
+
+impl PCIDriver for Driver {
+    fn vendor_id(&self) -> u16 {
+        0x8086
+    }
+
+    fn device_id(&self) -> u16 {
+        self.dev_id
+    }
+
+    fn handle_device(&self, device: Arc<PCIDevice<'static>>) -> Result<(), PciError> {
+        let Header::Endpoint(header) = device.header else {
+            Err(EINVAL)?
+        };
+
+        let bar0 = header.bars[0];
+
+        if bar0 & 0xf != 0 {
+            Err(EINVAL)?;
+        }
+
+        device.enable_bus_mastering();
+
+        let base = PAddr::from(bar0 as usize);
+        let e1000e = E1000eDev::new(base, header.interrupt_line as usize)?;
+
+        let dev = netdev::register_netdev(e1000e)?;
+        dev.lock_irq().up()?;
+
+        Ok(())
+    }
+}
+
+pub fn register_e1000e_driver() {
+    let dev_ids = [0x100e, 0x10d3, 0x10ea, 0x153a];
 
-    //     assert_eq!(ret, 0);
-    // }
+    for id in dev_ids.into_iter() {
+        pcie::register_driver(Driver { dev_id: id }).unwrap();
+    }
 }

+ 1 - 8
src/driver/sbi_console.rs

@@ -29,12 +29,5 @@ pub fn init_console() {
 
     let console = Arc::new(SbiConsole);
     let terminal = Terminal::new(console.clone());
-    crate::kernel::console::set_console(terminal.clone()).expect("Failed to set console");
-
-    CharDevice::register(
-        make_device(4, 64),
-        Arc::from("sbi_console"),
-        CharDeviceType::Terminal(terminal),
-    )
-    .expect("Failed to register SBI console as a character device");
+    crate::kernel::console::set_console(terminal).expect("Failed to set console");
 }

+ 81 - 65
src/driver/serial.rs

@@ -1,3 +1,5 @@
+mod io;
+
 use crate::{
     kernel::{
         block::make_device, console::set_console, constants::EIO, interrupt::register_irq_handler,
@@ -8,12 +10,10 @@ use crate::{
 use alloc::{collections::vec_deque::VecDeque, format, sync::Arc};
 use bitflags::bitflags;
 use core::pin::pin;
-use eonix_hal::arch_exported::io::Port8;
+use eonix_mm::address::PAddr;
 use eonix_runtime::{run::FutureRun, scheduler::Scheduler};
 use eonix_sync::{SpinIrq as _, WaitList};
-
-#[cfg(not(target_arch = "x86_64"))]
-compile_error!("Serial driver is only supported on x86_64 architecture");
+use io::SerialIO;
 
 bitflags! {
     struct LineStatus: u8 {
@@ -22,6 +22,11 @@ bitflags! {
     }
 }
 
+trait SerialRegister {
+    fn read(&self) -> u8;
+    fn write(&self, value: u8);
+}
+
 #[allow(dead_code)]
 struct Serial {
     id: u32,
@@ -33,35 +38,22 @@ struct Serial {
     working: Spin<bool>,
     tx_buffer: Spin<VecDeque<u8>>,
 
-    tx_rx: Port8,
-    int_ena: Port8,
-    int_ident: Port8,
-    line_control: Port8,
-    modem_control: Port8,
-    line_status: Port8,
-    modem_status: Port8,
-    scratch: Port8,
+    ioregs: SerialIO,
 }
 
 impl Serial {
-    const COM0_BASE: u16 = 0x3f8;
-    const COM1_BASE: u16 = 0x2f8;
-
-    const COM0_IRQ: u8 = 4;
-    const COM1_IRQ: u8 = 3;
-
     fn enable_interrupts(&self) {
         // Enable interrupt #0: Received data available
-        self.int_ena.write(0x03);
+        self.ioregs.int_ena().write(0x03);
     }
 
     fn disable_interrupts(&self) {
         // Disable interrupt #0: Received data available
-        self.int_ena.write(0x02);
+        self.ioregs.int_ena().write(0x02);
     }
 
     fn line_status(&self) -> LineStatus {
-        LineStatus::from_bits_truncate(self.line_status.read())
+        LineStatus::from_bits_truncate(self.ioregs.line_status().read())
     }
 
     async fn wait_for_interrupt(&self) {
@@ -85,7 +77,7 @@ impl Serial {
 
         loop {
             while port.line_status().contains(LineStatus::RX_READY) {
-                let ch = port.tx_rx.read();
+                let ch = port.ioregs.tx_rx().read();
 
                 if let Some(terminal) = terminal.as_ref() {
                     terminal.commit_char(ch).await;
@@ -99,7 +91,7 @@ impl Serial {
                 // Give it a chance to receive data.
                 for &ch in tx_buffer.iter().take(64) {
                     if port.line_status().contains(LineStatus::TX_READY) {
-                        port.tx_rx.write(ch);
+                        port.ioregs.tx_rx().write(ch);
                     } else {
                         break;
                     }
@@ -116,40 +108,32 @@ impl Serial {
         }
     }
 
-    pub fn new(id: u32, base_port: u16) -> KResult<Self> {
-        let port = Self {
+    pub fn new(id: u32, ioregs: SerialIO) -> KResult<Self> {
+        ioregs.int_ena().write(0x00); // Disable all interrupts
+        ioregs.line_control().write(0x80); // Enable DLAB (set baud rate divisor)
+        ioregs.tx_rx().write(0x00); // Set divisor to 0 (lo byte) 115200 baud rate
+        ioregs.int_ena().write(0x00); //              0 (hi byte)
+        ioregs.line_control().write(0x03); // 8 bits, no parity, one stop bit
+        ioregs.int_ident().write(0xc7); // Enable FIFO, clear them, with 14-byte threshold
+        ioregs.modem_control().write(0x0b); // IRQs enabled, RTS/DSR set
+        ioregs.modem_control().write(0x1e); // Set in loopback mode, test the serial chip
+        ioregs.tx_rx().write(0x19); // Test serial chip (send byte 0x19 and check if serial returns
+                                    // same byte)
+        if ioregs.tx_rx().read() != 0x19 {
+            return Err(EIO);
+        }
+
+        ioregs.modem_control().write(0x0f); // Return to normal operation mode
+
+        Ok(Self {
             id,
             name: Arc::from(format!("ttyS{id}")),
             terminal: Spin::new(None),
             worker_wait: WaitList::new(),
             working: Spin::new(true),
             tx_buffer: Spin::new(VecDeque::new()),
-            tx_rx: Port8::new(base_port),
-            int_ena: Port8::new(base_port + 1),
-            int_ident: Port8::new(base_port + 2),
-            line_control: Port8::new(base_port + 3),
-            modem_control: Port8::new(base_port + 4),
-            line_status: Port8::new(base_port + 5),
-            modem_status: Port8::new(base_port + 6),
-            scratch: Port8::new(base_port + 7),
-        };
-
-        port.int_ena.write(0x00); // Disable all interrupts
-        port.line_control.write(0x80); // Enable DLAB (set baud rate divisor)
-        port.tx_rx.write(0x00); // Set divisor to 0 (lo byte) 115200 baud rate
-        port.int_ena.write(0x00); //              0 (hi byte)
-        port.line_control.write(0x03); // 8 bits, no parity, one stop bit
-        port.int_ident.write(0xc7); // Enable FIFO, clear them, with 14-byte threshold
-        port.modem_control.write(0x0b); // IRQs enabled, RTS/DSR set
-        port.modem_control.write(0x1e); // Set in loopback mode, test the serial chip
-        port.tx_rx.write(0x19); // Test serial chip (send byte 0x19 and check if serial returns
-                                // same byte)
-        if port.tx_rx.read() != 0x19 {
-            return Err(EIO);
-        }
-
-        port.modem_control.write(0x0f); // Return to normal operation mode
-        Ok(port)
+            ioregs,
+        })
     }
 
     fn wakeup_worker(&self) {
@@ -161,24 +145,18 @@ impl Serial {
 
     fn irq_handler(&self) {
         // Read the interrupt ID register to clear the interrupt.
-        self.int_ident.read();
+        self.ioregs.int_ident().read();
         self.wakeup_worker();
     }
 
-    fn register_char_device(port: Self) -> KResult<()> {
-        let port = Arc::new(port);
+    fn register_as_char_device(self, irq_no: usize) -> KResult<()> {
+        let port = Arc::new(self);
         let terminal = Terminal::new(port.clone());
 
         port.terminal.lock().replace(terminal.clone());
 
         {
             let port = port.clone();
-            let irq_no = match port.id {
-                0 => Serial::COM0_IRQ,
-                1 => Serial::COM1_IRQ,
-                _ => unreachable!(),
-            };
-
             register_irq_handler(irq_no as i32, move || {
                 port.irq_handler();
             })?;
@@ -208,18 +186,56 @@ impl TerminalDevice for Serial {
 
     fn write_direct(&self, data: &[u8]) {
         for &ch in data {
-            self.tx_rx.write(ch);
+            self.ioregs.tx_rx().write(ch);
         }
     }
 }
 
 pub fn init() -> KResult<()> {
-    if let Ok(port) = Serial::new(0, Serial::COM0_BASE) {
-        Serial::register_char_device(port)?;
+    #[cfg(target_arch = "x86_64")]
+    {
+        let (com0, com1) = unsafe {
+            const COM0_BASE: u16 = 0x3f8;
+            const COM1_BASE: u16 = 0x2f8;
+            // SAFETY: The COM ports are well-known hardware addresses.
+            (SerialIO::new(COM0_BASE), SerialIO::new(COM1_BASE))
+        };
+
+        if let Ok(port) = Serial::new(0, com0) {
+            const COM0_IRQ: usize = 4;
+            port.register_as_char_device(COM0_IRQ)?;
+        }
+
+        if let Ok(port) = Serial::new(1, com1) {
+            const COM1_IRQ: usize = 3;
+            port.register_as_char_device(COM1_IRQ)?;
+        }
     }
 
-    if let Ok(port) = Serial::new(1, Serial::COM1_BASE) {
-        Serial::register_char_device(port)?;
+    #[cfg(target_arch = "riscv64")]
+    {
+        use eonix_hal::arch_exported::fdt::FDT;
+
+        if let Some(uart) = FDT.find_compatible(&["ns16550a", "ns16550"]) {
+            let regs = uart.reg().unwrap();
+            let base_address = regs
+                .map(|reg| PAddr::from(reg.starting_address as usize))
+                .next()
+                .expect("UART base address not found");
+
+            let port = unsafe {
+                // SAFETY: The base address is provided by the FDT and should be valid.
+                SerialIO::new(base_address)
+            };
+
+            let serial = Serial::new(0, port)?;
+            serial.register_as_char_device(
+                uart.interrupts()
+                    .expect("UART device should have `interrupts` property")
+                    .next()
+                    .expect("UART device should have an interrupt pin"),
+            )?;
+        }
     }
 
     Ok(())

+ 157 - 0
src/driver/serial/io.rs

@@ -0,0 +1,157 @@
+use super::SerialRegister;
+use core::ptr::NonNull;
+use eonix_hal::mm::ArchPhysAccess;
+use eonix_mm::address::{PAddr, PhysAccess};
+
+#[cfg(target_arch = "x86_64")]
+use eonix_hal::arch_exported::io::Port8;
+
+#[cfg(target_arch = "x86_64")]
+pub struct SerialIO {
+    tx_rx: Port8,
+    int_ena: Port8,
+    int_ident: Port8,
+    line_control: Port8,
+    modem_control: Port8,
+    line_status: Port8,
+    modem_status: Port8,
+    scratch: Port8,
+}
+
+#[cfg(target_arch = "x86_64")]
+impl SerialRegister for Port8 {
+    fn read(&self) -> u8 {
+        self.read()
+    }
+
+    fn write(&self, data: u8) {
+        self.write(data);
+    }
+}
+
+#[cfg(target_arch = "x86_64")]
+impl SerialIO {
+    /// Creates a new `SerialIO` instance with the given physical address.
+    ///
+    /// # Safety
+    /// This function is unsafe because it assumes that the provided `base` is a valid IO port
+    /// base for the serial port. The caller must ensure that this port base is correct.
+    pub unsafe fn new(base: u16) -> Self {
+        Self {
+            tx_rx: Port8::new(base),
+            int_ena: Port8::new(base + 1),
+            int_ident: Port8::new(base + 2),
+            line_control: Port8::new(base + 3),
+            modem_control: Port8::new(base + 4),
+            line_status: Port8::new(base + 5),
+            modem_status: Port8::new(base + 6),
+            scratch: Port8::new(base + 7),
+        }
+    }
+
+    pub fn tx_rx(&self) -> impl SerialRegister {
+        self.tx_rx
+    }
+
+    pub fn int_ena(&self) -> impl SerialRegister {
+        self.int_ena
+    }
+
+    pub fn int_ident(&self) -> impl SerialRegister {
+        self.int_ident
+    }
+
+    pub fn line_control(&self) -> impl SerialRegister {
+        self.line_control
+    }
+
+    pub fn modem_control(&self) -> impl SerialRegister {
+        self.modem_control
+    }
+
+    pub fn line_status(&self) -> impl SerialRegister {
+        self.line_status
+    }
+
+    pub fn modem_status(&self) -> impl SerialRegister {
+        self.modem_status
+    }
+
+    pub fn scratch(&self) -> impl SerialRegister {
+        self.scratch
+    }
+}
+
+#[cfg(target_arch = "riscv64")]
+pub struct SerialIO {
+    base_addr: NonNull<u8>,
+}
+
+#[cfg(target_arch = "riscv64")]
+unsafe impl Send for SerialIO {}
+
+#[cfg(target_arch = "riscv64")]
+unsafe impl Sync for SerialIO {}
+
+#[cfg(target_arch = "riscv64")]
+impl SerialRegister for NonNull<u8> {
+    fn read(&self) -> u8 {
+        // SAFETY: `self` is a valid pointer to the serial port register.
+        unsafe { self.as_ptr().read_volatile() }
+    }
+
+    fn write(&self, data: u8) {
+        // SAFETY: `self` is a valid pointer to the serial port register.
+        unsafe { self.as_ptr().write_volatile(data) }
+    }
+}
+
+#[cfg(target_arch = "riscv64")]
+impl SerialIO {
+    /// Creates a new `SerialIO` instance with the given physical address.
+    ///
+    /// # Safety
+    /// This function is unsafe because it assumes that the provided `base_addr` is a valid
+    /// physical address for the serial port. The caller must ensure that this address is correct
+    /// and that the memory at this address is accessible.
+    pub unsafe fn new(base_addr: PAddr) -> Self {
+        Self {
+            base_addr: unsafe {
+                // SAFETY: `base_addr` is a valid physical address for the serial port.
+                ArchPhysAccess::as_ptr(base_addr)
+            },
+        }
+    }
+
+    pub fn tx_rx(&self) -> impl SerialRegister {
+        self.base_addr
+    }
+
+    pub fn int_ena(&self) -> impl SerialRegister {
+        unsafe { self.base_addr.add(1) }
+    }
+
+    pub fn int_ident(&self) -> impl SerialRegister {
+        unsafe { self.base_addr.add(2) }
+    }
+
+    pub fn line_control(&self) -> impl SerialRegister {
+        unsafe { self.base_addr.add(3) }
+    }
+
+    pub fn modem_control(&self) -> impl SerialRegister {
+        unsafe { self.base_addr.add(4) }
+    }
+
+    pub fn line_status(&self) -> impl SerialRegister {
+        unsafe { self.base_addr.add(5) }
+    }
+
+    pub fn modem_status(&self) -> impl SerialRegister {
+        unsafe { self.base_addr.add(6) }
+    }
+
+    pub fn scratch(&self) -> impl SerialRegister {
+        unsafe { self.base_addr.add(7) }
+    }
+}

+ 2 - 2
src/driver/virtio.rs

@@ -15,6 +15,7 @@ use eonix_mm::{
     address::{Addr, PAddr, PhysAccess},
     paging::PFN,
 };
+use eonix_runtime::task::Task;
 use eonix_sync::Spin;
 use virtio_drivers::{
     device::blk::VirtIOBlk,
@@ -113,8 +114,7 @@ pub fn init_virtio_devices() {
                     )
                     .expect("Failed to register VirtIO Block device");
 
-                    block_device
-                        .partprobe()
+                    Task::block_on(block_device.partprobe())
                         .expect("Failed to probe partitions for VirtIO Block device");
 
                     disk_id += 1;

+ 18 - 9
src/driver/virtio/virtio_blk.rs

@@ -17,16 +17,25 @@ impl BlockRequestQueue for Spin<VirtIOBlk<HAL, MmioTransport<'_>>> {
     }
 
     fn submit(&self, req: BlockDeviceRequest) -> KResult<()> {
-        let mut dev = self.lock();
-        for ((start, len), buffer_page) in
-            Chunks::new(req.sector as usize, req.count as usize, 8).zip(req.buffer.iter())
-        {
-            let buffer = unsafe {
-                // SAFETY: Pages in `req.buffer` are guaranteed to be exclusively owned by us.
-                &mut buffer_page.as_memblk().as_bytes_mut()[..len as usize * 512]
-            };
+        match req {
+            BlockDeviceRequest::Write { .. } => todo!(),
+            BlockDeviceRequest::Read {
+                sector,
+                count,
+                buffer,
+            } => {
+                let mut dev = self.lock();
+                for ((start, len), buffer_page) in
+                    Chunks::new(sector as usize, count as usize, 8).zip(buffer.iter())
+                {
+                    let buffer = unsafe {
+                        // SAFETY: Pages in `req.buffer` are guaranteed to be exclusively owned by us.
+                        &mut buffer_page.as_memblk().as_bytes_mut()[..len as usize * 512]
+                    };
 
-            dev.read_blocks(start, buffer).map_err(|_| EIO)?;
+                    dev.read_blocks(start, buffer).map_err(|_| EIO)?;
+                }
+            }
         }
 
         Ok(())

+ 2 - 2
src/fs/fat32.rs

@@ -101,12 +101,12 @@ impl FatFs {
     fn read_cluster(&self, cluster: ClusterNo, buf: &Page) -> KResult<()> {
         let cluster = cluster - 2;
 
-        let rq = BlockDeviceRequest {
+        let rq = BlockDeviceRequest::Read {
             sector: self.data_start as u64 + cluster as u64 * self.sectors_per_cluster as u64,
             count: self.sectors_per_cluster as u64,
             buffer: core::slice::from_ref(buf),
         };
-        self.device.read_raw(rq)?;
+        self.device.commit_request(rq)?;
 
         Ok(())
     }

+ 2 - 2
src/fs/procfs.rs

@@ -119,8 +119,8 @@ impl DirInode {
     pub fn new(ino: Ino, vfs: Weak<ProcFs>) -> Arc<Self> {
         Self::new_locked(ino, vfs, |inode, rwsem| unsafe {
             addr_of_mut_field!(inode, entries).write(Locked::new(vec![], rwsem));
-            addr_of_mut_field!(inode, mode).write((S_IFDIR | 0o755).into());
-            addr_of_mut_field!(inode, nlink).write(1.into());
+            addr_of_mut_field!(&mut *inode, mode).write((S_IFDIR | 0o755).into());
+            addr_of_mut_field!(&mut *inode, nlink).write(1.into());
         })
     }
 }

+ 9 - 9
src/fs/tmpfs.rs

@@ -40,8 +40,8 @@ impl NodeInode {
         Self::new_locked(ino, vfs, |inode, _| unsafe {
             addr_of_mut_field!(inode, devid).write(devid);
 
-            addr_of_mut_field!(inode, mode).write(mode.into());
-            addr_of_mut_field!(inode, nlink).write(1.into());
+            addr_of_mut_field!(&mut *inode, mode).write(mode.into());
+            addr_of_mut_field!(&mut *inode, nlink).write(1.into());
         })
     }
 }
@@ -64,9 +64,9 @@ impl DirectoryInode {
             addr_of_mut_field!(inode, entries)
                 .write(Locked::new(vec![(Arc::from(b".".as_slice()), ino)], rwsem));
 
-            addr_of_mut_field!(inode, size).write(1.into());
-            addr_of_mut_field!(inode, mode).write((S_IFDIR | (mode & 0o777)).into());
-            addr_of_mut_field!(inode, nlink).write(1.into()); // link from `.` to itself
+            addr_of_mut_field!(&mut *inode, size).write(1.into());
+            addr_of_mut_field!(&mut *inode, mode).write((S_IFDIR | (mode & 0o777)).into());
+            addr_of_mut_field!(&mut *inode, nlink).write(1.into()); // link from `.` to itself
         })
     }
 
@@ -229,8 +229,8 @@ impl SymlinkInode {
             let len = target.len();
             addr_of_mut_field!(inode, target).write(target);
 
-            addr_of_mut_field!(inode, mode).write((S_IFLNK | 0o777).into());
-            addr_of_mut_field!(inode, size).write((len as u64).into());
+            addr_of_mut_field!(&mut *inode, mode).write((S_IFLNK | 0o777).into());
+            addr_of_mut_field!(&mut *inode, size).write((len as u64).into());
         })
     }
 }
@@ -258,8 +258,8 @@ impl FileInode {
         Self::new_locked(ino, vfs, |inode, rwsem| unsafe {
             addr_of_mut_field!(inode, filedata).write(Locked::new(vec![], rwsem));
 
-            addr_of_mut_field!(inode, mode).write((S_IFREG | (mode & 0o777)).into());
-            addr_of_mut_field!(inode, nlink).write(1.into());
+            addr_of_mut_field!(&mut *inode, mode).write((S_IFREG | (mode & 0o777)).into());
+            addr_of_mut_field!(&mut *inode, nlink).write(1.into());
         })
     }
 }

+ 98 - 89
src/kernel/block.rs

@@ -1,11 +1,13 @@
+mod mbr;
+
 use super::{
     constants::ENOENT,
     mem::{paging::Page, AsMemoryBlock as _},
     vfs::DevId,
 };
-use crate::kernel::constants::{EEXIST, EINVAL, EIO};
+use crate::kernel::constants::{EEXIST, EINVAL};
 use crate::{
-    io::{Buffer, FillResult, UninitBuffer},
+    io::{Buffer, FillResult},
     prelude::*,
 };
 use alloc::{
@@ -13,40 +15,44 @@ use alloc::{
     sync::Arc,
 };
 use core::cmp::Ordering;
+use mbr::MBRPartTable;
 
 pub fn make_device(major: u32, minor: u32) -> DevId {
     (major << 8) & 0xff00u32 | minor & 0xffu32
 }
 
-pub trait BlockRequestQueue: Send + Sync {
-    /// Maximum number of sectors that can be read in one request
-    ///
-    fn max_request_pages(&self) -> u64;
-
-    fn submit(&self, req: BlockDeviceRequest) -> KResult<()>;
+pub struct Partition {
+    pub lba_offset: u64,
+    pub sector_count: u64,
 }
 
-struct BlockDeviceDisk {
-    queue: Arc<dyn BlockRequestQueue>,
+pub trait PartTable {
+    fn partitions(&self) -> impl Iterator<Item = Partition> + use<'_, Self>;
 }
 
-#[allow(dead_code)]
-struct BlockDevicePartition {
-    disk_dev: DevId,
-    offset: u64,
+pub trait BlockRequestQueue: Send + Sync {
+    /// Maximum number of sectors that can be read in one request
+    fn max_request_pages(&self) -> u64;
 
-    queue: Arc<dyn BlockRequestQueue>,
+    fn submit(&self, req: BlockDeviceRequest) -> KResult<()>;
 }
 
 enum BlockDeviceType {
-    Disk(BlockDeviceDisk),
-    Partition(BlockDevicePartition),
+    Disk {
+        queue: Arc<dyn BlockRequestQueue>,
+    },
+    Partition {
+        disk_dev: DevId,
+        lba_offset: u64,
+        queue: Arc<dyn BlockRequestQueue>,
+    },
 }
 
 pub struct BlockDevice {
+    /// Unique device identifier, major and minor numbers
     devid: DevId,
-    size: u64,
-    max_pages: u64,
+    /// Total size of the device in sectors (512 bytes each)
+    sector_count: u64,
 
     dev_type: BlockDeviceType,
 }
@@ -73,37 +79,16 @@ impl Ord for BlockDevice {
 
 static BLOCK_DEVICE_LIST: Spin<BTreeMap<DevId, Arc<BlockDevice>>> = Spin::new(BTreeMap::new());
 
-#[derive(Debug, Clone, Copy)]
-#[repr(C)]
-struct MBREntry {
-    attr: u8,
-    chs_start: [u8; 3],
-    part_type: u8,
-    chs_end: [u8; 3],
-    lba_start: u32,
-    cnt: u32,
-}
-
-#[derive(Debug, Clone, Copy)]
-#[repr(C, packed)]
-struct MBR {
-    code: [u8; 446],
-    entries: [MBREntry; 4],
-    magic: [u8; 2],
-}
-
 impl BlockDevice {
     pub fn register_disk(
         devid: DevId,
         size: u64,
         queue: Arc<dyn BlockRequestQueue>,
     ) -> KResult<Arc<Self>> {
-        let max_pages = queue.max_request_pages();
         let device = Arc::new(Self {
             devid,
-            size,
-            max_pages,
-            dev_type: BlockDeviceType::Disk(BlockDeviceDisk { queue }),
+            sector_count: size,
+            dev_type: BlockDeviceType::Disk { queue },
         });
 
         match BLOCK_DEVICE_LIST.lock().entry(devid) {
@@ -122,21 +107,27 @@ impl BlockDevice {
         self.devid
     }
 
+    fn queue(&self) -> &Arc<dyn BlockRequestQueue> {
+        match &self.dev_type {
+            BlockDeviceType::Disk { queue } => queue,
+            BlockDeviceType::Partition { queue, .. } => queue,
+        }
+    }
+
     pub fn register_partition(&self, idx: u32, offset: u64, size: u64) -> KResult<Arc<Self>> {
-        let queue = match self.dev_type {
-            BlockDeviceType::Disk(ref disk) => disk.queue.clone(),
-            BlockDeviceType::Partition(_) => return Err(EINVAL),
+        let queue = match &self.dev_type {
+            BlockDeviceType::Disk { queue } => queue.clone(),
+            BlockDeviceType::Partition { .. } => return Err(EINVAL),
         };
 
         let device = Arc::new(BlockDevice {
             devid: make_device(self.devid >> 8, idx as u32),
-            size,
-            max_pages: self.max_pages,
-            dev_type: BlockDeviceType::Partition(BlockDevicePartition {
+            sector_count: size,
+            dev_type: BlockDeviceType::Partition {
                 disk_dev: self.devid,
-                offset,
+                lba_offset: offset,
                 queue,
-            }),
+            },
         });
 
         match BLOCK_DEVICE_LIST.lock().entry(device.devid()) {
@@ -145,29 +136,21 @@ impl BlockDevice {
         }
     }
 
-    pub fn partprobe(&self) -> KResult<()> {
+    pub async fn partprobe(&self) -> KResult<()> {
         match self.dev_type {
-            BlockDeviceType::Partition(_) => Err(EINVAL),
-            BlockDeviceType::Disk(_) => {
-                let mut mbr: UninitBuffer<MBR> = UninitBuffer::new();
-                self.read_some(0, &mut mbr)?.ok_or(EIO)?;
-                let mbr = mbr.assume_filled_ref()?;
-
-                if mbr.magic != [0x55, 0xaa] {
-                    return Ok(());
-                }
-
-                let entries = mbr.entries;
-
-                for (idx, entry) in entries.iter().enumerate() {
-                    if entry.part_type == 0 {
-                        continue;
-                    }
-
-                    let offset = entry.lba_start as u64;
-                    let size = entry.cnt as u64;
-
-                    self.register_partition(idx as u32 + 1, offset, size)?;
+            BlockDeviceType::Partition { .. } => Err(EINVAL),
+            BlockDeviceType::Disk { .. } => {
+                let mbr_table = MBRPartTable::from_disk(self).await?;
+
+                for (
+                    idx,
+                    Partition {
+                        lba_offset,
+                        sector_count,
+                    },
+                ) in mbr_table.partitions().enumerate()
+                {
+                    self.register_partition(idx as u32 + 1, lba_offset, sector_count)?;
                 }
 
                 Ok(())
@@ -183,19 +166,32 @@ impl BlockDevice {
     /// - `req.sector` must be within the disk size
     /// - `req.buffer` must be enough to hold the data
     ///
-    pub fn read_raw(&self, mut req: BlockDeviceRequest) -> KResult<()> {
-        // TODO: check disk size limit
-        if req.sector + req.count > self.size {
-            return Err(EINVAL);
-        }
+    pub fn commit_request(&self, mut req: BlockDeviceRequest) -> KResult<()> {
+        // Verify the request parameters.
+        match &mut req {
+            BlockDeviceRequest::Read { sector, count, .. } => {
+                if *sector + *count > self.sector_count {
+                    return Err(EINVAL);
+                }
 
-        match self.dev_type {
-            BlockDeviceType::Disk(ref disk) => disk.queue.submit(req),
-            BlockDeviceType::Partition(ref part) => {
-                req.sector += part.offset;
-                part.queue.submit(req)
+                if let BlockDeviceType::Partition { lba_offset, .. } = &self.dev_type {
+                    // Adjust the sector for partition offset.
+                    *sector += lba_offset;
+                }
+            }
+            BlockDeviceRequest::Write { sector, count, .. } => {
+                if *sector + *count > self.sector_count {
+                    return Err(EINVAL);
+                }
+
+                if let BlockDeviceType::Partition { lba_offset, .. } = &self.dev_type {
+                    // Adjust the sector for partition offset.
+                    *sector += lba_offset;
+                }
             }
         }
+
+        self.queue().submit(req)
     }
 
     /// Read some from the block device, may involve some copy and fragmentation
@@ -234,7 +230,7 @@ impl BlockDevice {
                     pages = core::slice::from_ref(page.as_ref().unwrap());
                 }
                 count => {
-                    nread = count.min(self.max_pages);
+                    nread = count.min(self.queue().max_request_pages());
 
                     let npages = (nread + 15) / 16;
                     let mut _page_vec = Vec::with_capacity(npages as usize);
@@ -246,13 +242,13 @@ impl BlockDevice {
                 }
             }
 
-            let req = BlockDeviceRequest {
+            let req = BlockDeviceRequest::Read {
                 sector: sector_start,
                 count: nread,
                 buffer: &pages,
             };
 
-            self.read_raw(req)?;
+            self.commit_request(req)?;
 
             for page in pages.iter() {
                 // SAFETY: We are the only owner of the page so no one could be mutating it.
@@ -283,8 +279,21 @@ impl BlockDevice {
     }
 }
 
-pub struct BlockDeviceRequest<'lt> {
-    pub sector: u64, // Sector to read from, in 512-byte blocks
-    pub count: u64,  // Number of sectors to read
-    pub buffer: &'lt [Page],
+pub enum BlockDeviceRequest<'lt> {
+    Read {
+        /// Sector to read from, in 512-byte blocks
+        sector: u64,
+        /// Number of sectors to read
+        count: u64,
+        /// Buffer pages to read into
+        buffer: &'lt [Page],
+    },
+    Write {
+        /// Sector to write to, in 512-byte blocks
+        sector: u64,
+        /// Number of sectors to write
+        count: u64,
+        /// Buffer pages to write from
+        buffer: &'lt [Page],
+    },
 }

+ 57 - 0
src/kernel/block/mbr.rs

@@ -0,0 +1,57 @@
+use super::{BlockDevice, PartTable, Partition};
+use crate::{
+    io::UninitBuffer,
+    kernel::constants::{EIO, ENODEV},
+    prelude::KResult,
+};
+
+#[repr(C)]
+#[derive(Clone, Copy)]
+struct MBREntry {
+    attr: u8,
+    chs_start: [u8; 3],
+    part_type: u8,
+    chs_end: [u8; 3],
+    lba_start: u32,
+    cnt: u32,
+}
+
+#[repr(C, packed)]
+#[derive(Clone, Copy)]
+struct MBRData {
+    code: [u8; 446],
+    entries: [MBREntry; 4],
+    magic: [u8; 2],
+}
+
+pub struct MBRPartTable {
+    entries: [MBREntry; 4],
+}
+
+impl MBRPartTable {
+    pub async fn from_disk(disk: &BlockDevice) -> KResult<Self> {
+        let mut mbr: UninitBuffer<MBRData> = UninitBuffer::new();
+        disk.read_some(0, &mut mbr)?.ok_or(EIO)?;
+        let mbr = mbr.assume_init()?;
+
+        if mbr.magic != [0x55, 0xaa] {
+            Err(ENODEV)?;
+        }
+
+        Ok(Self {
+            entries: mbr.entries,
+        })
+    }
+}
+
+impl PartTable for MBRPartTable {
+    fn partitions(&self) -> impl Iterator<Item = Partition> + use<'_> {
+        self.entries
+            .iter()
+            .filter(|entry| entry.part_type != 0)
+            .map(|entry| Partition {
+                lba_offset: entry.lba_start as u64,
+                sector_count: entry.cnt as u64,
+            })
+    }
+}

+ 11 - 27
src/kernel/interrupt.rs

@@ -1,9 +1,8 @@
 use super::mem::handle_kernel_page_fault;
-use super::timer::timer_interrupt;
+use super::timer::{should_reschedule, timer_interrupt};
 use crate::kernel::constants::EINVAL;
 use crate::prelude::*;
 use alloc::sync::Arc;
-use eonix_hal::processor::CPU;
 use eonix_hal::traits::fault::Fault;
 use eonix_hal::traits::trap::{RawTrapContext, TrapType};
 use eonix_hal::trap::TrapContext;
@@ -11,27 +10,17 @@ use eonix_mm::address::{Addr as _, VAddr};
 use eonix_runtime::scheduler::Scheduler;
 use eonix_sync::SpinIrq as _;
 
-static IRQ_HANDLERS: Spin<[Option<Arc<dyn Fn() + Send + Sync>>; 16]> =
-    Spin::new([const { None }; 16]);
+static IRQ_HANDLERS: Spin<[Vec<Arc<dyn Fn() + Send + Sync>>; 16]> =
+    Spin::new([const { Vec::new() }; 16]);
 
 pub fn default_irq_handler(irqno: usize) {
     assert!(irqno < 16);
 
-    let handler = IRQ_HANDLERS.lock()[irqno as usize].as_ref().cloned();
-    if let Some(handler) = handler {
-        handler();
-    }
-
-    #[cfg(target_arch = "x86_64")]
     {
-        use eonix_hal::arch_exported::io::Port8;
-
-        const PIC1_COMMAND: Port8 = Port8::new(0x20);
-        const PIC2_COMMAND: Port8 = Port8::new(0xA0);
+        let handlers = IRQ_HANDLERS.lock();
 
-        PIC1_COMMAND.write(0x20); // EOI
-        if irqno >= 8 {
-            PIC2_COMMAND.write(0x20); // EOI
+        for handler in handlers[irqno].iter() {
+            handler();
         }
     }
 }
@@ -61,11 +50,11 @@ pub fn interrupt_handler(trap_ctx: &mut TrapContext) {
     match trap_ctx.trap_type() {
         TrapType::Syscall { no, .. } => unreachable!("Syscall {} in kernel space.", no),
         TrapType::Fault(fault) => default_fault_handler(fault, trap_ctx),
-        TrapType::Irq(no) => default_irq_handler(no),
-        TrapType::Timer => {
-            timer_interrupt();
+        TrapType::Irq { callback } => callback(default_irq_handler),
+        TrapType::Timer { callback } => {
+            callback(timer_interrupt);
 
-            if eonix_preempt::count() == 0 {
+            if eonix_preempt::count() == 0 && should_reschedule() {
                 // To make scheduler satisfied.
                 eonix_preempt::disable();
                 Scheduler::schedule();
@@ -82,11 +71,6 @@ where
         return Err(EINVAL);
     }
 
-    let old = IRQ_HANDLERS.lock_irq()[irqno as usize].replace(Arc::new(handler));
-    assert!(old.is_none(), "IRQ handler already registered");
+    IRQ_HANDLERS.lock_irq()[irqno as usize].push(Arc::new(handler));
     Ok(())
 }
-
-pub fn end_of_interrupt() {
-    CPU::local().as_mut().end_of_interrupt();
-}

+ 0 - 1
src/kernel/pcie/driver.rs

@@ -29,7 +29,6 @@ pub fn register_driver(driver: impl PCIDriver + 'static) -> KResult<()> {
     };
 
     let device = PCIE_DEVICES.lock().find(&index).clone_pointer();
-
     if let Some(device) = device {
         driver.handle_device(device)?;
     };

+ 23 - 1
src/kernel/syscall/procops.rs

@@ -1,3 +1,5 @@
+use core::time::Duration;
+
 use super::sysinfo::TimeVal;
 use super::SyscallNoReturn;
 use crate::io::Buffer;
@@ -8,9 +10,10 @@ use crate::kernel::constants::{
 use crate::kernel::mem::PageBuffer;
 use crate::kernel::task::{
     do_clone, futex_wait, futex_wake, FutexFlags, FutexOp, ProcessList, ProgramLoader,
-    SignalAction, Thread, WaitObject, WaitType,
+    SignalAction, Thread, WaitType,
 };
 use crate::kernel::task::{parse_futexop, CloneArgs};
+use crate::kernel::timer::sleep;
 use crate::kernel::user::dataflow::{CheckedUserPointer, UserString};
 use crate::kernel::user::{UserPointer, UserPointerMut};
 use crate::kernel::vfs::{self, dentry::Dentry};
@@ -45,6 +48,25 @@ bitflags! {
     }
 }
 
+#[eonix_macros::define_syscall(SYS_NANOSLEEP)]
+fn nanosleep(req: *const (u32, u32), rem: *mut (u32, u32)) -> KResult<usize> {
+    let req = UserPointer::new(req)?.read()?;
+    let rem = if rem.is_null() {
+        None
+    } else {
+        Some(UserPointerMut::new(rem)?)
+    };
+
+    let duration = Duration::from_secs(req.0 as u64) + Duration::from_nanos(req.1 as u64);
+    Task::block_on(sleep(duration));
+
+    if let Some(rem) = rem {
+        rem.write((0, 0))?;
+    }
+
+    Ok(0)
+}
+
 #[eonix_macros::define_syscall(SYS_UMASK)]
 fn umask(mask: u32) -> KResult<u32> {
     let mut umask = thread.fs_context.umask.lock();

+ 8 - 5
src/kernel/task/thread.rs

@@ -7,7 +7,7 @@ use crate::{
         interrupt::default_irq_handler,
         syscall::{syscall_handlers, SyscallHandler},
         task::{clone::CloneArgs, CloneFlags},
-        timer::timer_interrupt,
+        timer::{should_reschedule, timer_interrupt},
         user::UserPointerMut,
         vfs::{filearray::FileArray, FsContext},
     },
@@ -383,10 +383,13 @@ impl Thread {
                     self.signal_list.raise(Signal::SIGILL);
                 }
                 TrapType::Fault(Fault::Unknown(_)) => unimplemented!("Unhandled fault"),
-                TrapType::Irq(irqno) => default_irq_handler(irqno),
-                TrapType::Timer => {
-                    timer_interrupt();
-                    yield_now().await;
+                TrapType::Irq { callback } => callback(default_irq_handler),
+                TrapType::Timer { callback } => {
+                    callback(timer_interrupt);
+
+                    if should_reschedule() {
+                        yield_now().await;
+                    }
                 }
                 TrapType::Syscall { no, args } => {
                     if let Some(retval) = self.handle_syscall(no, args) {

+ 176 - 14
src/kernel/timer.rs

@@ -1,34 +1,196 @@
-use super::interrupt::end_of_interrupt;
-use core::sync::atomic::{AtomicUsize, Ordering};
+use alloc::{collections::BinaryHeap, vec, vec::Vec};
+use core::{
+    cell::RefCell,
+    cmp::Reverse,
+    ops::Add,
+    sync::atomic::{AtomicUsize, Ordering},
+    task::{Poll, Waker},
+    time::Duration,
+};
+use eonix_hal::processor::CPU;
+use eonix_sync::{Spin, SpinIrq as _};
 
 static TICKS: AtomicUsize = AtomicUsize::new(0);
+static WAKEUP_TICK: AtomicUsize = AtomicUsize::new(usize::MAX);
+static SLEEPERS_LIST: Spin<BinaryHeap<Reverse<Sleepers>>> = Spin::new(BinaryHeap::new());
 
+#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd)]
 pub struct Ticks(usize);
 
+pub struct Instant(Ticks);
+
+struct Sleepers {
+    wakeup_tick: Ticks,
+    wakers: RefCell<Vec<Waker>>,
+}
+
+impl Ord for Sleepers {
+    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
+        self.wakeup_tick.cmp(&other.wakeup_tick)
+    }
+}
+
+impl PartialOrd for Sleepers {
+    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl Eq for Sleepers {}
+
+impl PartialEq for Sleepers {
+    fn eq(&self, other: &Self) -> bool {
+        self.wakeup_tick == other.wakeup_tick
+    }
+}
+
 impl Ticks {
-    pub fn in_secs(&self) -> usize {
-        self.0 / 100
+    pub const fn in_secs(&self) -> u64 {
+        self.0 as u64 / 1_000
+    }
+
+    pub const fn in_msecs(&self) -> u128 {
+        self.0 as u128
+    }
+
+    pub const fn in_usecs(&self) -> u128 {
+        self.0 as u128 * 1_000
+    }
+
+    pub const fn in_nsecs(&self) -> u128 {
+        self.0 as u128 * 1_000_000
+    }
+
+    pub fn now() -> Self {
+        Ticks(TICKS.load(Ordering::Acquire))
     }
 
-    #[allow(dead_code)]
-    pub fn in_msecs(&self) -> usize {
-        self.0 * 10
+    pub fn since_boot() -> Duration {
+        Duration::from_nanos(Self::now().in_nsecs() as u64)
+    }
+}
+
+impl Instant {
+    pub fn now() -> Self {
+        Instant(Ticks::now())
     }
 
-    pub fn in_usecs(&self) -> usize {
-        self.0 * 10_000
+    pub fn elapsed(&self) -> Duration {
+        Duration::from_nanos((Ticks::now().in_nsecs() - self.0.in_nsecs()) as u64)
     }
+}
+
+impl From<Ticks> for Instant {
+    fn from(ticks: Ticks) -> Self {
+        Instant(ticks)
+    }
+}
 
-    pub fn in_nsecs(&self) -> usize {
-        self.0 * 10_000_000
+impl From<Instant> for Ticks {
+    fn from(instant: Instant) -> Self {
+        instant.0
+    }
+}
+
+impl Add for Ticks {
+    type Output = Ticks;
+
+    fn add(self, other: Self) -> Self::Output {
+        Ticks(self.0 + other.0)
+    }
+}
+
+impl Add<Duration> for Instant {
+    type Output = Instant;
+
+    fn add(self, duration: Duration) -> Self::Output {
+        Instant(self.0 + Ticks(duration.as_millis() as usize))
     }
 }
 
 pub fn timer_interrupt() {
-    end_of_interrupt();
-    TICKS.fetch_add(1, Ordering::Relaxed);
+    if CPU::local().cpuid() != 0 {
+        // Only the BSP should handle the timer interrupt.
+        return;
+    }
+
+    let current_tick = TICKS.fetch_add(1, Ordering::Relaxed) + 1;
+    let wakeup_tick = WAKEUP_TICK.load(Ordering::Acquire);
+
+    if wakeup_tick <= current_tick {
+        let mut sleepers = SLEEPERS_LIST.lock_irq();
+        let Some(Reverse(sleepers_to_wakeup)) = sleepers.pop() else {
+            return;
+        };
+
+        for waker in sleepers_to_wakeup.wakers.into_inner() {
+            waker.wake();
+        }
+
+        if WAKEUP_TICK.load(Ordering::Acquire) == wakeup_tick {
+            // The wakeup tick is not changed.
+            // Set the next wakeup tick to the next sleeper's wakeup time.
+            let wakeup_tick = sleepers
+                .peek()
+                .map(|sleepers| sleepers.0.wakeup_tick.0)
+                .unwrap_or(usize::MAX);
+
+            WAKEUP_TICK.store(wakeup_tick, Ordering::Release);
+        }
+    }
+}
+
+/// Returns true if the timeslice of the current task has expired and it should be rescheduled.
+pub fn should_reschedule() -> bool {
+    #[eonix_percpu::define_percpu]
+    static PREV_SCHED_TICK: usize = 0;
+
+    let prev_tick = PREV_SCHED_TICK.get();
+    let current_tick = Ticks::now().0;
+
+    if Ticks(current_tick - prev_tick).in_msecs() >= 10 {
+        PREV_SCHED_TICK.set(current_tick);
+        true
+    } else {
+        false
+    }
 }
 
 pub fn ticks() -> Ticks {
-    Ticks(TICKS.load(Ordering::Relaxed))
+    Ticks::now()
+}
+
+pub async fn sleep(duration: Duration) {
+    let wakeup_time = Instant::now() + duration;
+    let wakeup_tick = Ticks::from(wakeup_time);
+
+    core::future::poll_fn(|ctx| {
+        if Ticks::now() >= wakeup_tick {
+            return Poll::Ready(());
+        }
+
+        let mut sleepers_list = SLEEPERS_LIST.lock_irq();
+        let sleepers: Option<&Reverse<Sleepers>> = sleepers_list
+            .iter()
+            .find(|s| s.0.wakeup_tick == wakeup_tick);
+
+        match sleepers {
+            Some(Reverse(sleepers)) => {
+                sleepers.wakers.borrow_mut().push(ctx.waker().clone());
+            }
+            None => {
+                sleepers_list.push(Reverse(Sleepers {
+                    wakeup_tick,
+                    wakers: RefCell::new(vec![ctx.waker().clone()]),
+                }));
+            }
+        }
+
+        if wakeup_tick < Ticks(WAKEUP_TICK.load(Ordering::Acquire)) {
+            WAKEUP_TICK.store(wakeup_tick.0, Ordering::Release);
+        }
+
+        Poll::Pending
+    })
+    .await;
 }

+ 0 - 1
src/kernel/vfs/file.rs

@@ -283,7 +283,6 @@ impl InodeFile {
 
         let mut cursor = Task::block_on(self.cursor.lock());
 
-        // TODO!!!: use `UserBuffer`
         if self.append {
             let nwrote = self.dentry.write(stream, WriteOffset::End(&mut cursor))?;
 

+ 1 - 0
src/lib.rs

@@ -135,6 +135,7 @@ async fn init_process(early_kstack: PRange) {
 
     #[cfg(target_arch = "riscv64")]
     {
+        driver::serial::init().unwrap();
         driver::virtio::init_virtio_devices();
         driver::e1000e::register_e1000e_driver();
         driver::ahci::register_ahci_driver();

+ 5 - 5
src/net/netdev.rs

@@ -5,7 +5,7 @@ use alloc::{
     collections::btree_map::{BTreeMap, Entry},
     sync::Arc,
 };
-use eonix_sync::{Mutex, Spin};
+use eonix_sync::Spin;
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum LinkStatus {
@@ -57,16 +57,16 @@ impl Ord for dyn Netdev {
 }
 
 static NETDEVS_ID: AtomicU32 = AtomicU32::new(0);
-static NETDEVS: Spin<BTreeMap<u32, Arc<Mutex<dyn Netdev>>>> = Spin::new(BTreeMap::new());
+static NETDEVS: Spin<BTreeMap<u32, Arc<Spin<dyn Netdev>>>> = Spin::new(BTreeMap::new());
 
 pub fn alloc_id() -> u32 {
     NETDEVS_ID.fetch_add(1, Ordering::SeqCst)
 }
 
-pub fn register_netdev(netdev: impl Netdev + 'static) -> Result<Arc<Mutex<dyn Netdev>>, u32> {
+pub fn register_netdev(netdev: impl Netdev + 'static) -> Result<Arc<Spin<dyn Netdev>>, u32> {
     match NETDEVS.lock().entry(netdev.id()) {
         Entry::Vacant(entry) => {
-            let netdev = Arc::new(Mutex::new(netdev));
+            let netdev = Arc::new(Spin::new(netdev));
             entry.insert(netdev.clone());
             Ok(netdev)
         }
@@ -74,6 +74,6 @@ pub fn register_netdev(netdev: impl Netdev + 'static) -> Result<Arc<Mutex<dyn Ne
     }
 }
 
-pub fn get_netdev(id: u32) -> Option<Arc<Mutex<dyn Netdev>>> {
+pub fn get_netdev(id: u32) -> Option<Arc<Spin<dyn Netdev>>> {
     NETDEVS.lock().get(&id).map(|netdev| netdev.clone())
 }