Kaynağa Gözat

rewrite(ahci): rewrite ahci module with rust

fix: remove incorrect slab cache free assertion
change: c style list remove function
add: rust Buffer and RawBuffer for io ops
greatbridf 1 yıl önce
ebeveyn
işleme
ea629feec9

+ 0 - 1
CMakeLists.txt

@@ -58,7 +58,6 @@ set(KERNEL_MAIN_SOURCES src/dev/builtin-chardev.cc
                         src/kernel/vfs.cpp
                         src/kernel/vga.cpp
                         src/kernel/hw/acpi.cc
-                        src/kernel/hw/ahci.cc
                         src/kernel/hw/pci.cc
                         src/kernel/hw/serial.cc
                         src/kernel/hw/timer.cc

+ 16 - 1
Cargo.toml

@@ -14,4 +14,19 @@ bindgen = "0.70.1"
 
 [profile.dev]
 panic = "abort"
-opt-level = "z"
+
+[profile.dev.package."*"]
+opt-level = 2
+
+[profile.dev.package."gbos-rust-part"]
+opt-level = 1
+
+[profile.dev.build-override]
+opt-level = 0
+codegen-units = 256
+debug = false
+
+[profile.release.build-override]
+opt-level = 0
+codegen-units = 256
+debug = false

+ 0 - 22
include/kernel/vfs.hpp

@@ -20,19 +20,6 @@ constexpr dev_t make_device(uint32_t major, uint32_t minor) {
     return ((major << 8) & 0xFF00U) | (minor & 0xFFU);
 }
 
-// buf, buf_size, offset, cnt
-using blkdev_read =
-    std::function<ssize_t(char*, std::size_t, std::size_t, std::size_t)>;
-
-// buf, offset, cnt
-using blkdev_write =
-    std::function<ssize_t(const char*, std::size_t, std::size_t)>;
-
-struct blkdev_ops {
-    blkdev_read read;
-    blkdev_write write;
-};
-
 // buf, buf_size, cnt
 using chrdev_read = std::function<ssize_t(char*, std::size_t, std::size_t)>;
 
@@ -64,16 +51,7 @@ struct fs_context {
     dentry_pointer root;
 };
 
-int register_block_device(dev_t node, const blkdev_ops& ops);
 int register_char_device(dev_t node, const chrdev_ops& ops);
-
-void partprobe();
-
-ssize_t block_device_read(dev_t node, char* buf, size_t buf_size, size_t offset,
-                          size_t n);
-ssize_t block_device_write(dev_t node, const char* buf, size_t offset,
-                           size_t n);
-
 ssize_t char_device_read(dev_t node, char* buf, size_t buf_size, size_t n);
 ssize_t char_device_write(dev_t node, const char* buf, size_t n);
 

+ 0 - 4
include/types/list.hpp

@@ -29,14 +29,10 @@ template <typename ListNode>
 void list_remove(ListNode** head, ListNode* node) {
     if (node == *head) {
         assert(!node->prev);
-
         *head = node->next;
-        if (*head)
-            (*head)->prev = nullptr;
     }
     else {
         assert(node->prev);
-
         node->prev->next = node->next;
     }
 

+ 1 - 0
src/driver.rs

@@ -1 +1,2 @@
+pub mod ahci;
 pub mod e1000e;

+ 92 - 0
src/driver/ahci/command.rs

@@ -0,0 +1,92 @@
+use crate::prelude::*;
+
+use crate::kernel::mem::paging::Page;
+
+use super::bindings::EINVAL;
+
+pub trait Command {
+    fn pages(&self) -> &[Page];
+    fn lba(&self) -> u64;
+
+    // in sectors
+    fn count(&self) -> u16;
+
+    fn cmd(&self) -> u8;
+    fn write(&self) -> bool;
+}
+
+pub struct IdentifyCommand {
+    pages: [Page; 1],
+}
+
+impl IdentifyCommand {
+    pub fn new() -> Self {
+        let page = Page::alloc_one();
+        Self { pages: [page] }
+    }
+}
+
+impl Command for IdentifyCommand {
+    fn pages(&self) -> &[Page] {
+        &self.pages
+    }
+
+    fn lba(&self) -> u64 {
+        0
+    }
+
+    fn count(&self) -> u16 {
+        1
+    }
+
+    fn cmd(&self) -> u8 {
+        0xEC
+    }
+
+    fn write(&self) -> bool {
+        false
+    }
+}
+
+pub struct ReadLBACommand<'lt> {
+    pages: &'lt [Page],
+    lba: u64,
+    count: u16,
+}
+
+impl<'lt> ReadLBACommand<'lt> {
+    pub fn new(pages: &'lt [Page], lba: u64, count: u16) -> KResult<Self> {
+        if pages.len() > 248 {
+            return Err(EINVAL);
+        }
+
+        let buffer_tot_len = pages.iter().fold(0, |acc, page| acc + page.len());
+        if buffer_tot_len < count as usize * 512 {
+            return Err(EINVAL);
+        }
+
+        Ok(Self { pages, lba, count })
+    }
+}
+
+impl Command for ReadLBACommand<'_> {
+    fn pages(&self) -> &[Page] {
+        self.pages
+    }
+
+    fn lba(&self) -> u64 {
+        self.lba
+    }
+
+    fn count(&self) -> u16 {
+        self.count
+    }
+
+    fn cmd(&self) -> u8 {
+        0xC8
+    }
+
+    fn write(&self) -> bool {
+        false
+    }
+}

+ 78 - 0
src/driver/ahci/control.rs

@@ -0,0 +1,78 @@
+use crate::{
+    kernel::mem::phys::{NoCachePP, PhysPtr},
+    prelude::*,
+};
+
+use super::{vread, vwrite, GHC_IE};
+
+/// An `AdapterControl` is an HBA device Global Host Control block
+///
+/// # Access
+///
+/// All reads and writes to this struct is volatile
+///
+#[repr(C)]
+pub struct AdapterControl {
+    capabilities: u32,
+    global_host_control: u32,
+    interrupt_status: u32,
+    ports_implemented: u32,
+    version: u32,
+    command_completion_coalescing_control: u32,
+    command_completion_coalescing_ports: u32,
+    enclosure_management_location: u32,
+    enclosure_management_control: u32,
+    host_capabilities_extended: u32,
+    bios_handoff_control_status: u32,
+
+    _reserved: [u8; 116],
+    vendor: [u8; 96],
+}
+
+impl AdapterControl {
+    pub fn new<'lt>(addr: usize) -> &'lt mut Self {
+        NoCachePP::new(addr).as_mut()
+    }
+}
+
+impl AdapterControl {
+    pub fn enable_interrupts(&mut self) {
+        let ghc = vread(&self.global_host_control);
+        vwrite(&mut self.global_host_control, ghc | GHC_IE);
+    }
+
+    pub fn implemented_ports(&self) -> ImplementedPortsIter {
+        ImplementedPortsIter::new(vread(&self.ports_implemented))
+    }
+}
+
+pub struct ImplementedPortsIter {
+    ports: u32,
+    n: u32,
+}
+
+impl ImplementedPortsIter {
+    fn new(ports: u32) -> Self {
+        Self { ports, n: 0 }
+    }
+}
+
+impl Iterator for ImplementedPortsIter {
+    type Item = u32;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.n == 32 {
+            return None;
+        }
+
+        let have: bool = self.ports & 1 != 0;
+        self.ports >>= 1;
+        self.n += 1;
+
+        if have {
+            Some(self.n - 1)
+        } else {
+            self.next()
+        }
+    }
+}

+ 249 - 0
src/driver/ahci/defs.rs

@@ -0,0 +1,249 @@
+#![allow(dead_code)]
+
+use crate::kernel::mem::paging::Page;
+pub const VENDOR_INTEL: u16 = 0x8086;
+pub const DEVICE_AHCI: u16 = 0x2922;
+
+pub const PCI_REG_ABAR: usize = 0x05;
+
+pub const GHC_IE: u32 = 0x00000002;
+pub const GHC_AE: u32 = 0x80000000;
+
+pub const ATA_DEV_BSY: u32 = 0x00000008;
+pub const ATA_DEV_DRQ: u32 = 0x00000004;
+
+pub const PORT_CMD_ST: u32 = 0x00000001;
+pub const PORT_CMD_FRE: u32 = 0x00000010;
+pub const PORT_CMD_FR: u32 = 0x00004000;
+pub const PORT_CMD_CR: u32 = 0x00008000;
+
+/// A `CommandHeader` is used to send commands to the HBA device
+///
+/// # Access
+///
+/// `clear_busy_upon_ok` and `bytes_transferred` are volatile
+///
+#[repr(C)]
+pub struct CommandHeader {
+    // [0:4]: Command FIS length
+    // [5]: ATAPI
+    // [6]: Write
+    // [7]: Prefetchable
+    first: u8,
+
+    // [0]: Reset
+    // [1]: BIST
+    // [2]: Clear busy upon ok
+    // [3]: Reserved
+    // [4:7]: Port multiplier
+    second: u8,
+
+    prdt_length: u16,
+    bytes_transferred: u32,
+    command_table_base: u64,
+
+    _reserved: [u32; 4],
+}
+
+impl CommandHeader {
+    pub fn clear(&mut self) {
+        self.first = 0;
+        self.second = 0;
+        self.prdt_length = 0;
+        self.bytes_transferred = 0;
+        self.command_table_base = 0;
+        self._reserved = [0; 4];
+    }
+
+    pub fn setup(&mut self, cmdtable_base: u64, prdtlen: u16, write: bool) {
+        self.first = 0x05; // FIS type
+
+        if write {
+            self.first |= 0x40;
+        }
+
+        self.second = 0x04; // Clear busy upon ok
+
+        self.prdt_length = prdtlen;
+        self.bytes_transferred = 0;
+        self.command_table_base = cmdtable_base;
+
+        self._reserved = [0; 4];
+    }
+}
+
+pub enum FisType {
+    H2D = 0x27,
+    D2H = 0x34,
+    DMAAct = 0x39,
+    DMASetup = 0x41,
+    Data = 0x46,
+    BIST = 0x58,
+    PIOSetup = 0x5f,
+    DevBits = 0xa1,
+}
+
+/// A `FISH2D` is a Host to Device FIS
+///
+#[repr(C)]
+pub struct FISH2D {
+    fis_type: FisType,
+
+    // [0:3]: Port
+    // [4:6]: Reserved
+    // [7]: IsCommand
+    shared: u8,
+
+    command: u8,
+    feature: u8,
+
+    lba0: u8,
+    lba1: u8,
+    lba2: u8,
+    device: u8,
+
+    lba3: u8,
+    lba4: u8,
+    lba5: u8,
+    feature_high: u8,
+
+    count: u16,
+    iso_command_completion: u8,
+    control_register: u8,
+
+    _reserved: [u8; 4],
+}
+
+impl FISH2D {
+    pub fn setup(&mut self, cmd: u8, lba: u64, count: u16) {
+        self.fis_type = FisType::H2D;
+        self.shared = 0x80; // IsCommand
+        self.command = cmd;
+        self.feature = 0;
+
+        self.lba0 = (lba & 0xff) as u8;
+        self.lba1 = ((lba >> 8) & 0xff) as u8;
+        self.lba2 = ((lba >> 16) & 0xff) as u8;
+        self.device = 0x40; // LBA mode
+
+        self.lba3 = ((lba >> 24) & 0xff) as u8;
+        self.lba4 = ((lba >> 32) & 0xff) as u8;
+        self.lba5 = ((lba >> 40) & 0xff) as u8;
+        self.feature_high = 0;
+
+        self.count = count;
+        self.iso_command_completion = 0;
+        self.control_register = 0;
+
+        self._reserved = [0; 4];
+    }
+}
+
+/// A `FISD2H` is a Device to Host FIS
+///
+#[repr(C)]
+pub struct FISD2H {
+    fis_type: FisType,
+
+    /// [0:3]: Port
+    /// [4:5]: Reserved
+    /// [6]: Interrupt
+    /// [7]: Reserved
+    shared: u8,
+
+    status: u8,
+    error: u8,
+
+    lba0: u8,
+    lba1: u8,
+    lba2: u8,
+    device: u8,
+
+    lba3: u8,
+    lba4: u8,
+    lba5: u8,
+    _reserved1: u8,
+
+    count: u16,
+
+    _reserved2: [u8; 6],
+}
+
+/// A `FISPIOSetup` is a PIO Setup FIS
+///
+#[repr(C)]
+pub struct FISPIOSetup {
+    fis_type: FisType,
+
+    /// [0:3]: Port
+    /// [4]: Reserved
+    /// [5]: Data transfer direction
+    /// [6]: Interrupt
+    /// [7]: Reserved
+    shared: u8,
+
+    status: u8,
+    error: u8,
+
+    lba0: u8,
+    lba1: u8,
+    lba2: u8,
+    device: u8,
+
+    lba3: u8,
+    lba4: u8,
+    lba5: u8,
+    _reserved1: u8,
+
+    count: u16,
+    _reserved2: u8,
+    new_status: u8,
+
+    transfer_count: u16,
+    _reserved3: [u8; 2],
+}
+
+/// A `ReceivedFis` is a FIS that the HBA device has received
+///
+#[repr(C)]
+pub struct ReceivedFis {
+    fis_dma_setup: [u8; 32], // we don't care about it for now
+
+    fis_pio: FISPIOSetup,
+    padding1: [u8; 12],
+
+    fis_reg: FISD2H,
+    padding2: [u8; 4],
+
+    fissdb: [u8; 8],
+
+    ufis: [u8; 64],
+
+    reserved: [u8; 96],
+}
+
+/// A `PRDTEntry` is a Physical Region Descriptor Table entry
+///
+#[repr(C)]
+pub struct PRDTEntry {
+    base: u64,
+    _reserved1: u32,
+
+    /// [0:21]: Byte count
+    /// [22:30]: Reserved
+    /// [31]: Interrupt on completion
+    shared: u32,
+}
+
+impl PRDTEntry {
+    pub fn setup(&mut self, page: &Page) {
+        self.base = page.as_phys() as u64;
+        self._reserved1 = 0;
+
+        self.shared = 0x80000000 | (page.len() as u32 & 0x3fffff);
+    }
+}
+
+// A CommandTable consists of a Command FIS, ATAPI Command, and PRDT entries
+// The Command FIS is always at the beginning of the table
+// Add 44 + 16 + 48 = 108 bytes will be the PRDT entries

+ 137 - 0
src/driver/ahci/mod.rs

@@ -0,0 +1,137 @@
+use crate::{
+    kernel::block::{make_device, BlockDevice},
+    prelude::*,
+};
+
+use alloc::sync::Arc;
+use bindings::{
+    kernel::hw::pci::{self, pci_device},
+    EIO,
+};
+use control::AdapterControl;
+use defs::*;
+use port::AdapterPort;
+
+mod command;
+mod control;
+mod defs;
+mod port;
+
+fn vread<T: Sized + Copy>(refval: &T) -> T {
+    unsafe { core::ptr::read_volatile(refval) }
+}
+
+fn vwrite<T: Sized + Copy>(refval: &mut T, val: T) {
+    unsafe { core::ptr::write_volatile(refval, val) }
+}
+
+fn spinwait_clear(refval: &u32, mask: u32) -> KResult<()> {
+    const SPINWAIT_MAX: usize = 1000;
+
+    let mut spins = 0;
+    while vread(refval) & mask != 0 {
+        if spins == SPINWAIT_MAX {
+            return Err(EIO);
+        }
+
+        spins += 1;
+    }
+
+    Ok(())
+}
+
+fn spinwait_set(refval: &u32, mask: u32) -> KResult<()> {
+    const SPINWAIT_MAX: usize = 1000;
+
+    let mut spins = 0;
+    while vread(refval) & mask != mask {
+        if spins == SPINWAIT_MAX {
+            return Err(EIO);
+        }
+
+        spins += 1;
+    }
+
+    Ok(())
+}
+
+struct Device<'lt, 'port> {
+    control_base: usize,
+    control: &'lt mut AdapterControl,
+    // TODO: impl Drop to free pci device
+    pcidev: *mut pci_device,
+    ports: Vec<Option<Arc<Mutex<AdapterPort<'port>>>>>,
+}
+
+impl<'lt, 'port: 'static> Device<'lt, 'port> {
+    fn probe_ports(&mut self) -> KResult<()> {
+        for nport in self.control.implemented_ports() {
+            let mut port = AdapterPort::<'port>::new(self.control_base, nport);
+
+            if !port.status_ok() {
+                continue;
+            }
+
+            port.init()?;
+
+            let port = Arc::new(Mutex::new(port));
+
+            self.ports[nport as usize] = Some(port.clone());
+
+            let port = BlockDevice::register_disk(
+                make_device(8, nport * 16),
+                2147483647, // TODO: get size from device
+                port,
+            )?;
+
+            port.partprobe()?;
+        }
+
+        Ok(())
+    }
+}
+
+impl<'lt: 'static, 'port: 'static> Device<'lt, 'port> {
+    pub fn new(pcidev: *mut pci_device) -> KResult<Self> {
+        let base = unsafe { *(*pcidev).header_type0() }.bars[PCI_REG_ABAR];
+
+        // use MMIO
+        if base & 0xf != 0 {
+            return Err(EIO);
+        }
+
+        let mut ports = Vec::with_capacity(32);
+        ports.resize_with(32, || None);
+
+        let mut device = Device {
+            control_base: base as usize,
+            control: AdapterControl::new(base as usize),
+            pcidev,
+            ports,
+        };
+
+        device.control.enable_interrupts();
+        device.probe_ports()?;
+
+        Ok(device)
+    }
+}
+
+unsafe extern "C" fn probe_device(pcidev: *mut pci_device) -> i32 {
+    match Device::new(pcidev) {
+        Ok(device) => {
+            // TODO!!!: save device to pci_device
+            Box::leak(Box::new(device));
+            0
+        },
+        Err(e) => -(e as i32),
+    }
+}
+
+pub fn register_ahci_driver() {
+    let ret = unsafe {
+        pci::register_driver_r(VENDOR_INTEL, DEVICE_AHCI, Some(probe_device))
+    };
+
+    assert_eq!(ret, 0);
+}

+ 186 - 0
src/driver/ahci/port.rs

@@ -0,0 +1,186 @@
+use bindings::EINVAL;
+
+use crate::prelude::*;
+
+use crate::kernel::block::{BlockDeviceRequest, BlockRequestQueue};
+use crate::kernel::mem::paging::Page;
+
+use crate::kernel::mem::phys::{NoCachePP, PhysPtr};
+
+use super::command::{Command, IdentifyCommand, ReadLBACommand};
+use super::{
+    spinwait_clear, vread, vwrite, CommandHeader, PRDTEntry, ReceivedFis,
+    ATA_DEV_BSY, ATA_DEV_DRQ, FISH2D, PORT_CMD_CR, PORT_CMD_FR, PORT_CMD_FRE,
+    PORT_CMD_ST,
+};
+
+/// An `AdapterPort` is an HBA device in AHCI mode.
+///
+/// # Access
+///
+/// All reads and writes to this struct is volatile
+///
+#[repr(C)]
+pub struct AdapterPortData {
+    pub command_list_base: u64,
+    pub fis_base: u64,
+
+    pub interrupt_status: u32,
+    pub interrupt_enable: u32,
+
+    pub command_status: u32,
+
+    _reserved2: u32,
+
+    pub task_file_data: u32,
+    pub signature: u32,
+
+    pub sata_status: u32,
+    pub sata_control: u32,
+    pub sata_error: u32,
+    pub sata_active: u32,
+
+    pub command_issue: u32,
+    pub sata_notification: u32,
+
+    pub fis_based_switch_control: u32,
+
+    _reserved1: [u32; 11],
+    vendor: [u32; 4],
+}
+
+pub struct AdapterPort<'lt> {
+    nport: u32,
+    data: &'lt mut AdapterPortData,
+    page: Page,
+    cmdheaders: &'lt mut [CommandHeader; 32],
+    recv_fis: &'lt mut ReceivedFis,
+}
+
+impl<'lt> AdapterPort<'lt> {
+    pub fn new(base: usize, nport: u32) -> Self {
+        let page = Page::alloc_one();
+        Self {
+            nport,
+            data: NoCachePP::new(base + 0x100 + 0x80 * nport as usize).as_mut(),
+            cmdheaders: page.as_cached().as_mut(),
+            recv_fis: page.as_cached().offset(0x400).as_mut(),
+            page,
+        }
+    }
+}
+
+impl<'lt> AdapterPort<'lt> {
+    pub fn status_ok(&self) -> bool {
+        self.data.sata_status & 0xf == 0x3
+    }
+
+    fn stop_command(&mut self) -> KResult<()> {
+        let cmd_status = vread(&self.data.command_status);
+        vwrite(
+            &mut self.data.command_status,
+            cmd_status & !(PORT_CMD_ST | PORT_CMD_FRE),
+        );
+
+        spinwait_clear(&self.data.command_status, PORT_CMD_CR | PORT_CMD_FR)
+    }
+
+    fn start_command(&mut self) -> KResult<()> {
+        spinwait_clear(&self.data.command_status, PORT_CMD_CR)?;
+
+        let cmd_status = vread(&self.data.command_status);
+        vwrite(
+            &mut self.data.command_status,
+            cmd_status | PORT_CMD_ST | PORT_CMD_FRE,
+        );
+
+        Ok(())
+    }
+
+    fn send_command(&mut self, cmd: &impl Command) -> KResult<()> {
+        let pages = cmd.pages();
+
+        // TODO: get an available command slot
+        let cmdslot = 0;
+
+        let cmdtable_page = Page::alloc_one();
+        self.cmdheaders[cmdslot].clear();
+        self.cmdheaders[cmdslot].setup(
+            cmdtable_page.as_phys() as u64,
+            pages.len() as u16,
+            cmd.write(),
+        );
+
+        let command_fis: &mut FISH2D = cmdtable_page.as_cached().as_mut();
+        command_fis.setup(cmd.cmd(), cmd.lba(), cmd.count());
+
+        let prdt: &mut [PRDTEntry; 248] =
+            cmdtable_page.as_cached().offset(0x80).as_mut();
+
+        for (idx, page) in pages.iter().enumerate() {
+            prdt[idx].setup(page);
+        }
+
+        // clear received fis?
+
+        // wait until port is not busy
+        spinwait_clear(&self.data.task_file_data, ATA_DEV_BSY | ATA_DEV_DRQ)?;
+
+        vwrite(&mut self.data.command_issue, 1 << cmdslot);
+        spinwait_clear(&self.data.command_issue, 1 << cmdslot)?;
+
+        // TODO: check and wait interrupt
+
+        Ok(())
+    }
+
+    fn identify(&mut self) -> KResult<()> {
+        let cmd = IdentifyCommand::new();
+
+        // TODO: check returned data
+        self.send_command(&cmd)?;
+
+        Ok(())
+    }
+
+    pub fn init(&mut self) -> KResult<()> {
+        self.stop_command()?;
+
+        // TODO: use interrupt
+        // this is the PxIE register, setting bits here will make
+        //      it generate corresponding interrupts in PxIS
+        //
+        // port->interrupt_enable = 1;
+
+        vwrite(&mut self.data.command_list_base, self.page.as_phys() as u64);
+        vwrite(&mut self.data.fis_base, self.page.as_phys() as u64 + 0x400);
+
+        self.start_command()?;
+
+        match self.identify() {
+            Err(err) => {
+                self.stop_command()?;
+                return Err(err);
+            }
+            Ok(_) => Ok(()),
+        }
+    }
+}
+
+impl<'lt> BlockRequestQueue for AdapterPort<'lt> {
+    fn max_request_pages(&self) -> u64 {
+        1024
+    }
+
+    fn submit(&mut self, req: BlockDeviceRequest) -> KResult<()> {
+        // TODO: check disk size limit using newtype
+        if req.count > 65535 {
+            return Err(EINVAL);
+        }
+
+        let command =
+            ReadLBACommand::new(req.buffer, req.sector, req.count as u16)?;
+
+        self.send_command(&command)
+    }
+}

+ 191 - 177
src/fs/fat32.rs

@@ -5,7 +5,7 @@ use alloc::{
 use bindings::{EINVAL, EIO, S_IFDIR, S_IFREG};
 
 use crate::{
-    io::copy_offset_count,
+    io::{RawBuffer, UninitBuffer},
     kernel::{
         block::{make_device, BlockDevice, BlockDeviceRequest},
         mem::{paging::Page, phys::PhysPtr},
@@ -20,27 +20,6 @@ use crate::{
     KResult,
 };
 
-const EOC: ClusterNo = 0x0FFFFFF8;
-
-/// Convert a mutable reference to a slice of bytes
-/// This is a safe wrapper around `core::slice::from_raw_parts_mut`
-///
-fn as_slice<T>(object: &mut [T]) -> &mut [u8] {
-    unsafe {
-        core::slice::from_raw_parts_mut(
-            object.as_mut_ptr() as *mut u8,
-            object.len() * core::mem::size_of::<T>(),
-        )
-    }
-}
-
-/// Convert a slice of bytes to a mutable reference
-///
-fn as_object<T>(slice: &[u8]) -> &T {
-    assert_eq!(slice.len(), core::mem::size_of::<T>());
-    unsafe { &*(slice.as_ptr() as *const T) }
-}
-
 type ClusterNo = u32;
 
 const ATTR_RO: u8 = 0x01;
@@ -69,6 +48,63 @@ struct FatDirectoryEntry {
     size: u32,
 }
 
+impl FatDirectoryEntry {
+    pub fn filename(&self) -> KResult<String> {
+        let basename = str::from_utf8(&self.name)
+            .map_err(|_| EINVAL)?
+            .trim_end_matches(char::from(' '));
+
+        let extension = if self.extension[0] != ' ' as u8 {
+            Some(
+                str::from_utf8(&self.extension)
+                    .map_err(|_| EINVAL)?
+                    .trim_end_matches(char::from(' ')),
+            )
+        } else {
+            None
+        };
+
+        let mut name = String::from(basename);
+
+        if let Some(extension) = extension {
+            name.push('.');
+            name += extension;
+        }
+
+        if self.reserved & RESERVED_FILENAME_LOWERCASE != 0 {
+            name.make_ascii_lowercase();
+        }
+
+        Ok(name)
+    }
+
+    pub fn ino(&self) -> Ino {
+        let cluster_high = (self.cluster_high as u32) << 16;
+        (self.cluster_low as u32 | cluster_high) as Ino
+    }
+
+    fn is_volume_id(&self) -> bool {
+        self.attr & ATTR_VOLUME_ID != 0
+    }
+
+    fn is_free(&self) -> bool {
+        self.name[0] == 0x00
+    }
+
+    fn is_deleted(&self) -> bool {
+        self.name[0] == 0xE5
+    }
+
+    fn is_invalid(&self) -> bool {
+        self.is_volume_id() || self.is_free() || self.is_deleted()
+    }
+
+    fn is_directory(&self) -> bool {
+        self.attr & ATTR_DIRECTORY != 0
+    }
+}
+
+#[derive(Clone, Copy)]
 #[repr(C, packed)]
 struct Bootsector {
     jmp: [u8; 3],
@@ -108,7 +144,7 @@ struct Bootsector {
 /// 3. Inodes
 ///
 struct FatFs {
-    device: BlockDevice,
+    device: Arc<BlockDevice>,
     icache: Mutex<InodeCache<FatFs>>,
     sectors_per_cluster: u8,
     rootdir_cluster: ClusterNo,
@@ -118,31 +154,16 @@ struct FatFs {
 }
 
 impl FatFs {
-    // /// Read a sector
-    // fn read_sector(&self, sector: u64, buf: &mut [u8]) -> KResult<()> {
-    //     assert_eq!(buf.len(), 512);
-    //     let mut rq = BlockDeviceRequest {
-    //         sector,
-    //         count: 1,
-    //         buffer: Page::alloc_one(),
-    //     };
-    //     self.read(&mut rq)?;
-
-    //     buf.copy_from_slice(rq.buffer.as_cached().as_slice(512));
-
-    //     Ok(())
-    // }
-
-    fn read_cluster(&self, cluster: ClusterNo, buf: &mut [u8]) -> KResult<()> {
+    fn read_cluster(&self, cluster: ClusterNo, buf: &Page) -> KResult<()> {
         let cluster = cluster - 2;
 
-        let mut rq = BlockDeviceRequest {
+        let rq = BlockDeviceRequest {
             sector: self.data_start as u64
                 + cluster as u64 * self.sectors_per_cluster as u64,
             count: self.sectors_per_cluster as u64,
-            buffer: buf,
+            buffer: core::slice::from_ref(buf),
         };
-        self.device.read(&mut rq)?;
+        self.device.read_raw(rq)?;
 
         Ok(())
     }
@@ -153,7 +174,7 @@ impl FatFs {
         device: DevId,
     ) -> KResult<(Arc<Mutex<Self>>, Arc<dyn Inode>)> {
         let mut fatfs = Self {
-            device: BlockDevice::new(device),
+            device: BlockDevice::get(device)?,
             icache: Mutex::new(InodeCache::new()),
             sectors_per_cluster: 0,
             rootdir_cluster: 0,
@@ -162,19 +183,9 @@ impl FatFs {
             volume_label: String::new(),
         };
 
-        let mut info = [0u8; 512];
-
-        let info = {
-            let mut rq = BlockDeviceRequest {
-                sector: 0,
-                count: 1,
-                // buffer: Page::alloc_one(),
-                buffer: &mut info,
-            };
-            fatfs.device.read(&mut rq)?;
-
-            as_object::<Bootsector>(&info)
-        };
+        let mut info: UninitBuffer<Bootsector> = UninitBuffer::new();
+        fatfs.device.read_some(0, &mut info)?.ok_or(EIO)?;
+        let info = info.assume_filled_ref()?;
 
         fatfs.sectors_per_cluster = info.sectors_per_cluster;
         fatfs.rootdir_cluster = info.root_cluster;
@@ -189,17 +200,14 @@ impl FatFs {
                 0,
             );
 
-            let mut rq = BlockDeviceRequest {
-                sector: info.reserved_sectors as u64,
-                count: info.sectors_per_fat as u64,
-                buffer: unsafe {
-                    core::slice::from_raw_parts_mut(
-                        fat.as_mut_ptr() as *mut _,
-                        fat.len() * core::mem::size_of::<ClusterNo>(),
-                    )
-                },
-            };
-            fatfs.device.read(&mut rq)?;
+            let mut buffer = RawBuffer::new_from_slice(fat.as_mut_slice());
+
+            fatfs
+                .device
+                .read_some(info.reserved_sectors as usize * 512, &mut buffer)?
+                .ok_or(EIO)?;
+
+            assert!(buffer.filled());
         }
 
         fatfs.volume_label = String::from(
@@ -210,17 +218,8 @@ impl FatFs {
 
         let root_dir_cluster_count = {
             let fat = fatfs.fat.lock();
-            let mut next = fatfs.rootdir_cluster;
-            let mut count = 1;
-            loop {
-                next = fat[next as usize];
-                if next >= EOC {
-                    break;
-                }
-                count += 1;
-            }
 
-            count
+            ClusterIterator::new(&fat, fatfs.rootdir_cluster).count()
         };
 
         let fatfs = Arc::new(Mutex::new(fatfs));
@@ -272,6 +271,33 @@ struct FatInode {
     vfs: Weak<Mutex<FatFs>>,
 }
 
+struct ClusterIterator<'lt> {
+    fat: &'lt [ClusterNo],
+    cur: ClusterNo,
+}
+
+impl<'lt> ClusterIterator<'lt> {
+    fn new(fat: &'lt [ClusterNo], start: ClusterNo) -> Self {
+        Self { fat, cur: start }
+    }
+}
+
+impl<'lt> Iterator for ClusterIterator<'lt> {
+    type Item = ClusterNo;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        const EOC: ClusterNo = 0x0FFFFFF8;
+        let next = self.cur;
+
+        if next >= EOC {
+            None
+        } else {
+            self.cur = self.fat[next as usize];
+            Some(next)
+        }
+    }
+}
+
 impl Inode for FatInode {
     fn idata(&self) -> &Mutex<InodeData> {
         &self.idata
@@ -281,57 +307,60 @@ impl Inode for FatInode {
         self
     }
 
-    fn read(
-        &self,
-        mut buffer: &mut [u8],
-        mut count: usize,
-        mut offset: usize,
-    ) -> KResult<usize> {
+    fn read(&self, buffer: &mut [u8], offset: usize) -> KResult<usize> {
         let vfs = self.vfs.upgrade().ok_or(EIO)?;
         let vfs = vfs.lock();
         let fat = vfs.fat.lock();
 
         let cluster_size = vfs.sectors_per_cluster as usize * 512;
-        let mut cno = {
-            let idata = self.idata.lock();
-            idata.ino as ClusterNo
-        };
 
-        while offset >= cluster_size {
-            cno = fat[cno as usize];
-            offset -= cluster_size;
+        let buffer_len = buffer.len();
+        let skip_count = offset / cluster_size;
+        let inner_offset = offset % cluster_size;
+        let cluster_count =
+            (inner_offset + buffer.len() + cluster_size - 1) / cluster_size;
 
-            if cno >= EOC {
-                return Ok(0);
-            }
-        }
+        let mut cluster_iter =
+            ClusterIterator::new(&fat, self.idata.lock().ino as ClusterNo)
+                .skip(skip_count)
+                .take(cluster_count);
 
         let page_buffer = Page::alloc_one();
-        let page_buffer = page_buffer
-            .as_cached()
-            .as_mut_slice::<u8>(page_buffer.len());
-
-        let orig_count = count;
-        while count != 0 {
-            vfs.read_cluster(cno, page_buffer)?;
-
-            let ncopied = copy_offset_count(page_buffer, buffer, offset, count);
-            offset = 0;
 
-            if ncopied == 0 {
-                break;
+        let mut nread = 0;
+        if let Some(cluster) = cluster_iter.next() {
+            vfs.read_cluster(cluster, &page_buffer)?;
+
+            let (_, data) = page_buffer
+                .as_cached()
+                .as_slice::<u8>(page_buffer.len())
+                .split_at(inner_offset);
+
+            if data.len() > buffer_len - nread {
+                buffer[nread..].copy_from_slice(&data[..buffer_len - nread]);
+                return Ok(buffer_len);
+            } else {
+                buffer[nread..nread + data.len()].copy_from_slice(data);
+                nread += data.len();
             }
+        }
 
-            count -= ncopied;
-            buffer = &mut buffer[ncopied..];
+        for cluster in cluster_iter {
+            vfs.read_cluster(cluster, &page_buffer)?;
 
-            cno = fat[cno as usize];
-            if cno >= EOC {
-                break;
+            let data =
+                page_buffer.as_cached().as_slice::<u8>(page_buffer.len());
+
+            if data.len() > buffer_len - nread {
+                buffer[nread..].copy_from_slice(&data[..buffer_len - nread]);
+                return Ok(buffer_len);
+            } else {
+                buffer[nread..nread + data.len()].copy_from_slice(data);
+                nread += data.len();
             }
         }
 
-        Ok(orig_count - count)
+        Ok(nread)
     }
 
     fn readdir(
@@ -344,62 +373,44 @@ impl Inode for FatInode {
 
         let fat = vfs.fat.lock();
 
-        let idata = self.idata.lock();
-        let mut next = idata.ino as ClusterNo;
+        let cluster_size = vfs.sectors_per_cluster as usize * 512;
+        let skip_count = offset / cluster_size;
+        let inner_offset = offset % cluster_size;
 
-        let skip = offset / 512 / vfs.sectors_per_cluster as usize;
-        let mut offset = offset % (512 * vfs.sectors_per_cluster as usize);
-        for _ in 0..skip {
-            if next >= EOC {
-                return Ok(0);
-            }
-            next = fat[next as usize];
-        }
-        if next >= EOC {
-            return Ok(0);
-        }
+        let cluster_iter =
+            ClusterIterator::new(&fat, self.idata.lock().ino as ClusterNo)
+                .skip(skip_count)
+                .enumerate();
 
         let mut nread = 0;
         let buffer = Page::alloc_one();
-        let buffer = buffer.as_cached().as_mut_slice::<FatDirectoryEntry>(
-            vfs.sectors_per_cluster as usize * 512
-                / core::mem::size_of::<FatDirectoryEntry>(),
-        );
-        loop {
-            vfs.read_cluster(next, as_slice(buffer))?;
-            let start = offset / core::mem::size_of::<FatDirectoryEntry>();
-            let end = vfs.sectors_per_cluster as usize * 512
-                / core::mem::size_of::<FatDirectoryEntry>();
-            offset = 0;
-
-            for entry in buffer.iter().skip(start).take(end - start) {
-                if entry.attr & ATTR_VOLUME_ID != 0 {
-                    nread += core::mem::size_of::<FatDirectoryEntry>();
+        for (idx, cluster) in cluster_iter {
+            vfs.read_cluster(cluster, &buffer)?;
+
+            const ENTRY_SIZE: usize = core::mem::size_of::<FatDirectoryEntry>();
+            let count = cluster_size / ENTRY_SIZE;
+
+            let entries = {
+                let entries = buffer
+                    .as_cached()
+                    .as_slice::<FatDirectoryEntry>(count)
+                    .iter();
+
+                entries.skip(if idx == 0 {
+                    inner_offset / ENTRY_SIZE
+                } else {
+                    0
+                })
+            };
+
+            for entry in entries {
+                if entry.is_invalid() {
+                    nread += ENTRY_SIZE;
                     continue;
                 }
 
-                let cluster_high = (entry.cluster_high as u32) << 16;
-                let ino = (entry.cluster_low as u32 | cluster_high) as Ino;
-
-                let name = {
-                    let mut name = String::new();
-                    name += str::from_utf8(&entry.name)
-                        .map_err(|_| EINVAL)?
-                        .trim_end_matches(char::from(' '));
-
-                    if entry.extension[0] != ' ' as u8 {
-                        name.push('.');
-                    }
-
-                    name += str::from_utf8(&entry.extension)
-                        .map_err(|_| EINVAL)?
-                        .trim_end_matches(char::from(' '));
-
-                    if entry.reserved & RESERVED_FILENAME_LOWERCASE != 0 {
-                        name.make_ascii_lowercase();
-                    }
-                    name
-                };
+                let ino = entry.ino();
+                let name = entry.filename()?;
 
                 let inode = {
                     let mut icache = vfs.icache.lock();
@@ -407,21 +418,26 @@ impl Inode for FatInode {
                     match icache.get(ino) {
                         Some(inode) => inode,
                         None => {
-                            let is_directory = entry.attr & ATTR_DIRECTORY != 0;
+                            let nlink;
+                            let mut mode = 0o777;
+
+                            if entry.is_directory() {
+                                nlink = 2;
+                                mode |= S_IFDIR;
+                            } else {
+                                nlink = 1;
+                                mode |= S_IFREG;
+                            }
+
                             let inode = Arc::new(FatInode {
                                 idata: Mutex::new(InodeData {
                                     ino,
-                                    mode: 0o777
-                                        | if is_directory {
-                                            S_IFDIR
-                                        } else {
-                                            S_IFREG
-                                        },
-                                    nlink: if is_directory { 2 } else { 1 },
+                                    mode,
+                                    nlink,
                                     size: entry.size as u64,
-                                    atime: TimeSpec { sec: 0, nsec: 0 },
-                                    mtime: TimeSpec { sec: 0, nsec: 0 },
-                                    ctime: TimeSpec { sec: 0, nsec: 0 },
+                                    atime: TimeSpec::default(),
+                                    mtime: TimeSpec::default(),
+                                    ctime: TimeSpec::default(),
                                     uid: 0,
                                     gid: 0,
                                 }),
@@ -439,13 +455,11 @@ impl Inode for FatInode {
                     return Ok(nread);
                 }
 
-                nread += core::mem::size_of::<FatDirectoryEntry>();
-            }
-            next = fat[next as usize];
-            if next >= EOC {
-                return Ok(nread);
+                nread += ENTRY_SIZE;
             }
         }
+
+        Ok(nread)
     }
 
     fn vfs_weak(&self) -> Weak<Mutex<dyn Vfs>> {

+ 2 - 7
src/fs/procfs.rs

@@ -102,12 +102,7 @@ impl Inode for ProcFsNode {
         }
     }
 
-    fn read(
-        &self,
-        buffer: &mut [u8],
-        count: usize,
-        offset: usize,
-    ) -> KResult<usize> {
+    fn read(&self, buffer: &mut [u8], offset: usize) -> KResult<usize> {
         match self.data {
             ProcFsData::File(ref file) => {
                 if !file.can_read() {
@@ -123,7 +118,7 @@ impl Inode for ProcFsNode {
                     Some((data, _)) => data,
                 };
 
-                Ok(copy_offset_count(data, buffer, offset, count))
+                Ok(copy_offset_count(data, buffer, offset, buffer.len()))
             }
             _ => Err(EISDIR),
         }

+ 7 - 9
src/fs/tmpfs.rs

@@ -140,18 +140,16 @@ impl Inode for TmpFsInode {
         }
     }
 
-    fn read(
-        &self,
-        buffer: &mut [u8],
-        count: usize,
-        offset: usize,
-    ) -> KResult<usize> {
+    fn read(&self, buffer: &mut [u8], offset: usize) -> KResult<usize> {
         self.vfs()?;
 
         match *self.fsdata.lock() {
-            TmpFsData::File(ref file) => {
-                Ok(copy_offset_count(file, buffer, offset as usize, count))
-            }
+            TmpFsData::File(ref file) => Ok(copy_offset_count(
+                file,
+                buffer,
+                offset as usize,
+                buffer.len(),
+            )),
 
             _ => Err(EINVAL),
         }

+ 179 - 25
src/io.rs

@@ -1,46 +1,200 @@
 use bindings::EFAULT;
 
 use crate::prelude::*;
-use core::ffi::{c_char, c_size_t, c_uchar};
 
-pub struct Buffer {
-    buf: *mut c_uchar,
-    size: usize,
-    rem: usize,
+use core::{ffi::c_char, fmt::Write, mem::MaybeUninit, pin::Pin};
+
+pub enum FillResult {
+    Done(usize),
+    Partial(usize),
+    Full,
+}
+
+impl FillResult {
+    pub fn ok_or(self, err: u32) -> KResult<()> {
+        match self {
+            FillResult::Done(_) => Ok(()),
+            _ => Err(err),
+        }
+    }
+
+    pub fn allow_partial(self) -> usize {
+        match self {
+            FillResult::Done(n) | FillResult::Partial(n) => n,
+            FillResult::Full => 0,
+        }
+    }
 }
 
-impl Buffer {
-    pub fn new(buf: *mut c_uchar, _size: c_size_t) -> Self {
-        let size = _size as usize;
+pub trait Buffer {
+    fn total(&self) -> usize;
+    fn fill(&mut self, data: &[u8]) -> KResult<FillResult>;
+}
+
+pub struct UninitBuffer<'lt, T: Copy + Sized> {
+    data: Box<MaybeUninit<T>>,
+    buffer: RawBuffer<'lt>,
+}
+
+impl<'lt, T: Copy + Sized> UninitBuffer<'lt, T> {
+    pub fn new() -> Self {
+        let mut data = Box::new(MaybeUninit::uninit());
+        let ptr = data.as_mut_ptr();
+
         Self {
-            buf,
-            size,
-            rem: size,
+            data,
+            buffer: RawBuffer::new_from_slice(unsafe {
+                core::slice::from_raw_parts_mut(
+                    ptr as *mut u8,
+                    core::mem::size_of::<T>(),
+                )
+            }),
         }
     }
 
-    pub fn count(&self) -> usize {
-        self.size - self.rem
+    pub fn assume_filled_ref(&self) -> KResult<&T> {
+        if !self.buffer.filled() {
+            return Err(EFAULT);
+        }
+
+        Ok(unsafe { self.data.assume_init_ref() })
     }
 }
 
-use core::fmt::Write;
-impl Write for Buffer {
-    fn write_str(&mut self, s: &str) -> core::fmt::Result {
-        let s = s.as_bytes();
-        let len = s.len();
+impl<'lt, T: Copy + Sized> Buffer for UninitBuffer<'lt, T> {
+    fn total(&self) -> usize {
+        self.buffer.total()
+    }
+
+    fn fill(&mut self, data: &[u8]) -> KResult<FillResult> {
+        self.buffer.fill(data)
+    }
+}
+
+pub struct RawBuffer<'lt> {
+    buf: *mut u8,
+    tot: usize,
+    cur: usize,
+    _phantom: core::marker::PhantomData<&'lt mut u8>,
+}
+
+impl<'lt> RawBuffer<'lt> {
+    pub fn new_from_mut<T: Copy + Sized>(buf: &'lt mut T) -> Self {
+        Self {
+            buf: buf as *mut T as *mut u8,
+            tot: core::mem::size_of::<T>(),
+            cur: 0,
+            _phantom: core::marker::PhantomData,
+        }
+    }
+
+    pub fn new_from_slice<T: Copy + Sized>(buf: &'lt mut [T]) -> Self {
+        Self {
+            buf: buf.as_mut_ptr() as *mut u8,
+            tot: core::mem::size_of::<T>() * buf.len(),
+            cur: 0,
+            _phantom: core::marker::PhantomData,
+        }
+    }
+
+    pub fn count(&self) -> usize {
+        self.cur
+    }
+
+    pub fn total(&self) -> usize {
+        self.tot
+    }
+
+    pub fn available(&self) -> usize {
+        self.total() - self.count()
+    }
 
-        if self.rem <= len {
-            return Err(core::fmt::Error);
+    pub fn filled(&self) -> bool {
+        self.count() == self.total()
+    }
+
+    pub fn fill(&mut self, data: &[u8]) -> KResult<FillResult> {
+        match self.available() {
+            n if n == 0 => Ok(FillResult::Full),
+            n if n < data.len() => {
+                unsafe {
+                    core::ptr::copy_nonoverlapping(
+                        data.as_ptr(),
+                        self.buf.add(self.count()),
+                        n,
+                    );
+                }
+                self.cur += n;
+                Ok(FillResult::Partial(n))
+            }
+            _ => {
+                unsafe {
+                    core::ptr::copy_nonoverlapping(
+                        data.as_ptr(),
+                        self.buf.add(self.count()),
+                        data.len(),
+                    );
+                }
+                self.cur += data.len();
+                Ok(FillResult::Done(data.len()))
+            }
         }
+    }
+}
+
+impl Buffer for RawBuffer<'_> {
+    fn total(&self) -> usize {
+        RawBuffer::total(self)
+    }
+
+    fn fill(&mut self, data: &[u8]) -> KResult<FillResult> {
+        RawBuffer::fill(self, data)
+    }
+}
+
+pub struct ByteBuffer<'lt> {
+    buf: &'lt mut [u8],
+    cur: usize,
+}
 
-        unsafe {
-            core::ptr::copy_nonoverlapping(s.as_ptr(), self.buf, len);
-            self.buf = self.buf.add(len);
+impl<'lt> ByteBuffer<'lt> {
+    pub fn new(buf: &'lt mut [u8]) -> Self {
+        Self { buf, cur: 0 }
+    }
+
+    pub fn available(&self) -> usize {
+        self.buf.len() - self.cur
+    }
+}
+
+impl Buffer for ByteBuffer<'_> {
+    fn total(&self) -> usize {
+        self.buf.len()
+    }
+
+    fn fill(&mut self, data: &[u8]) -> KResult<FillResult> {
+        match self.available() {
+            n if n == 0 => Ok(FillResult::Full),
+            n if n < data.len() => {
+                self.buf[self.cur..].copy_from_slice(&data[..n]);
+                self.cur += n;
+                Ok(FillResult::Partial(n))
+            }
+            _ => {
+                self.buf[self.cur..self.cur + data.len()].copy_from_slice(data);
+                self.cur += data.len();
+                Ok(FillResult::Done(data.len()))
+            }
         }
-        self.rem -= len;
+    }
+}
 
-        Ok(())
+impl Write for RawBuffer<'_> {
+    fn write_str(&mut self, s: &str) -> core::fmt::Result {
+        match self.fill(s.as_bytes()) {
+            Ok(FillResult::Done(_)) => Ok(()),
+            _ => Err(core::fmt::Error),
+        }
     }
 }
 

+ 3 - 0
src/kernel.ld

@@ -116,6 +116,9 @@ SECTIONS
         *(.data)
         *(.data*)
 
+        *(.got)
+        *(.got.plt)
+
         . = ALIGN(0x1000);
         DATA_END = .;
         KIMAGE_END = .;

+ 283 - 38
src/kernel/block.rs

@@ -1,67 +1,312 @@
+use core::cmp::Ordering;
+
 use crate::{
-    bindings::root::{fs::block_device_read, EINVAL, EIO},
-    KResult,
+    io::{Buffer, FillResult, UninitBuffer},
+    prelude::*,
+};
+
+use alloc::{
+    collections::btree_map::{BTreeMap, Entry},
+    sync::Arc,
 };
+use bindings::{EEXIST, EINVAL, EIO, ENOENT};
 
-use super::vfs::DevId;
+use crate::KResult;
+
+use super::{
+    mem::{paging::Page, phys::PhysPtr},
+    vfs::DevId,
+};
 
 pub fn make_device(major: u32, minor: u32) -> DevId {
     (major << 8) & 0xff00u32 | minor & 0xffu32
 }
 
+pub trait BlockRequestQueue: Send + Sync {
+    /// Maximum number of sectors that can be read in one request
+    ///
+    fn max_request_pages(&self) -> u64;
+
+    fn submit(&mut self, req: BlockDeviceRequest) -> KResult<()>;
+}
+
+struct BlockDeviceDisk {
+    queue: Arc<Mutex<dyn BlockRequestQueue>>,
+}
+
+struct BlockDevicePartition {
+    disk_dev: DevId,
+    offset: u64,
+
+    queue: Arc<Mutex<dyn BlockRequestQueue>>,
+}
+
+enum BlockDeviceType {
+    Disk(BlockDeviceDisk),
+    Partition(BlockDevicePartition),
+}
+
 pub struct BlockDevice {
-    device: DevId,
+    devid: DevId,
+    size: u64,
+    max_pages: u64,
+
+    dev_type: BlockDeviceType,
+}
+
+impl PartialEq for BlockDevice {
+    fn eq(&self, other: &Self) -> bool {
+        self.devid == other.devid
+    }
 }
 
-// pub struct BlockDeviceRequest<'lt> {
-//     pub sector: u64, // Sector to read from, in 512-byte blocks
-//     pub count: u64,  // Number of sectors to read
-//     pub buffer: &'lt [Page],
-// }
+impl PartialOrd for BlockDevice {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.devid.cmp(&other.devid))
+    }
+}
 
-pub struct BlockDeviceRequest<'lt> {
-    pub sector: u64, // Sector to read from, in 512-byte blocks
-    pub count: u64,  // Number of sectors to read
-    pub buffer: &'lt mut [u8],
+impl Eq for BlockDevice {}
+
+impl Ord for BlockDevice {
+    fn cmp(&self, other: &Self) -> Ordering {
+        self.devid.cmp(&other.devid)
+    }
+}
+
+static BLOCK_DEVICE_LIST: Mutex<BTreeMap<DevId, Arc<BlockDevice>>> =
+    Mutex::new(BTreeMap::new());
+
+#[derive(Debug, Clone, Copy)]
+#[repr(C)]
+struct MBREntry {
+    attr: u8,
+    chs_start: [u8; 3],
+    part_type: u8,
+    chs_end: [u8; 3],
+    lba_start: u32,
+    cnt: u32,
+}
+
+#[derive(Debug, Clone, Copy)]
+#[repr(C, packed)]
+struct MBR {
+    code: [u8; 446],
+    entries: [MBREntry; 4],
+    magic: [u8; 2],
 }
 
 impl BlockDevice {
-    pub fn new(device: DevId) -> Self {
-        BlockDevice { device }
+    pub fn register_disk(
+        devid: DevId,
+        size: u64,
+        queue: Arc<Mutex<dyn BlockRequestQueue>>,
+    ) -> KResult<Arc<Self>> {
+        let max_pages = queue.lock().max_request_pages();
+        let device = Arc::new(Self {
+            devid,
+            size,
+            max_pages,
+            dev_type: BlockDeviceType::Disk(BlockDeviceDisk { queue }),
+        });
+
+        match BLOCK_DEVICE_LIST.lock().entry(devid) {
+            Entry::Vacant(entry) => Ok(entry.insert(device).clone()),
+            Entry::Occupied(_) => Err(EEXIST),
+        }
+    }
+
+    pub fn get(devid: DevId) -> KResult<Arc<Self>> {
+        BLOCK_DEVICE_LIST.lock().get(&devid).cloned().ok_or(ENOENT)
     }
+}
 
+impl BlockDevice {
     pub fn devid(&self) -> DevId {
-        self.device
+        self.devid
     }
 
-    pub fn read(&self, req: &mut BlockDeviceRequest) -> KResult<()> {
-        // // Verify that the buffer is big enough
-        // let buffer_size = req.buffer.iter().fold(0, |acc, e| acc + e.len());
-        // if buffer_size / 512 < req.count as usize {
-        //     return Err(EINVAL);
-        // }
+    pub fn register_partition(
+        &self,
+        idx: u32,
+        offset: u64,
+        size: u64,
+    ) -> KResult<Arc<Self>> {
+        let queue = match self.dev_type {
+            BlockDeviceType::Disk(ref disk) => disk.queue.clone(),
+            BlockDeviceType::Partition(_) => return Err(EINVAL),
+        };
+
+        let device = Arc::new(BlockDevice {
+            devid: make_device(self.devid >> 8, idx as u32),
+            size,
+            max_pages: self.max_pages,
+            dev_type: BlockDeviceType::Partition(BlockDevicePartition {
+                disk_dev: self.devid,
+                offset,
+                queue,
+            }),
+        });
+
+        match BLOCK_DEVICE_LIST.lock().entry(device.devid()) {
+            Entry::Vacant(entry) => Ok(entry.insert(device).clone()),
+            Entry::Occupied(_) => Err(EEXIST),
+        }
+    }
+
+    pub fn partprobe(&self) -> KResult<()> {
+        match self.dev_type {
+            BlockDeviceType::Partition(_) => Err(EINVAL),
+            BlockDeviceType::Disk(_) => {
+                let mut mbr: UninitBuffer<MBR> = UninitBuffer::new();
+                self.read_some(0, &mut mbr)?.ok_or(EIO)?;
+                let mbr = mbr.assume_filled_ref()?;
+
+                if mbr.magic != [0x55, 0xaa] {
+                    return Ok(());
+                }
+
+                let entries = mbr.entries;
 
-        // Verify that the buffer is big enough
-        if req.buffer.len() < req.count as usize * 512 {
+                for (idx, entry) in entries.iter().enumerate() {
+                    if entry.part_type == 0 {
+                        continue;
+                    }
+
+                    let offset = entry.lba_start as u64;
+                    let size = entry.cnt as u64;
+
+                    self.register_partition(idx as u32 + 1, offset, size)?;
+                }
+
+                Ok(())
+            }
+        }
+    }
+
+    /// No extra overhead, send the request directly to the queue
+    /// If any of the parameters does not meet the requirement, the operation will fail
+    ///
+    /// # Requirements
+    /// - `req.count` must not exceed the disk size and maximum request size
+    /// - `req.sector` must be within the disk size
+    /// - `req.buffer` must be enough to hold the data
+    ///
+    pub fn read_raw(&self, mut req: BlockDeviceRequest) -> KResult<()> {
+        // TODO: check disk size limit
+        if req.sector + req.count > self.size {
             return Err(EINVAL);
         }
 
-        let buffer = req.buffer.as_mut_ptr();
+        match self.dev_type {
+            BlockDeviceType::Disk(ref disk) => disk.queue.lock().submit(req),
+            BlockDeviceType::Partition(ref part) => {
+                req.sector += part.offset;
+                part.queue.lock().submit(req)
+            }
+        }
+    }
 
-        let nread = unsafe {
-            block_device_read(
-                self.device as u32,
-                buffer as *mut _,
-                req.buffer.len(),
-                req.sector as usize * 512,
-                req.count as usize * 512,
-            )
-        };
+    /// Read some from the block device, may involve some copy and fragmentation
+    ///
+    /// Further optimization may be needed, including caching, read-ahead and reordering
+    ///
+    /// # Arguments
+    /// `offset` - offset in bytes
+    ///
+    pub fn read_some(
+        &self,
+        offset: usize,
+        buffer: &mut impl Buffer,
+    ) -> KResult<FillResult> {
+        let mut sector_start = offset as u64 / 512;
+        let mut first_sector_offset = offset as u64 % 512;
+        let mut sector_count =
+            (first_sector_offset + buffer.total() as u64 + 511) / 512;
 
-        match nread {
-            i if i < 0 => return Err(i as u32),
-            i if i as u64 == req.count * 512 => Ok(()),
-            _ => Err(EIO),
+        let mut nfilled = 0;
+        'outer: while sector_count != 0 {
+            let pages: &[Page];
+            let page: Option<Page>;
+            let page_vec: Option<Vec<Page>>;
+
+            let nread;
+
+            match sector_count {
+                count if count <= 8 => {
+                    nread = count;
+
+                    let _page = Page::alloc_one();
+                    page = Some(_page);
+                    pages = core::slice::from_ref(page.as_ref().unwrap());
+                }
+                count if count <= 16 => {
+                    nread = count;
+
+                    let _pages = Page::alloc_many(1);
+                    page = Some(_pages);
+                    pages = core::slice::from_ref(page.as_ref().unwrap());
+                }
+                count => {
+                    nread = count.min(self.max_pages);
+
+                    let npages = (nread + 15) / 16;
+                    let mut _page_vec = Vec::with_capacity(npages as usize);
+                    for _ in 0..npages {
+                        _page_vec.push(Page::alloc_many(1));
+                    }
+                    page_vec = Some(_page_vec);
+                    pages = page_vec.as_ref().unwrap().as_slice();
+                }
+            }
+
+            let req = BlockDeviceRequest {
+                sector: sector_start,
+                count: nread,
+                buffer: &pages,
+            };
+
+            self.read_raw(req)?;
+
+            for page in pages.iter() {
+                let data = if first_sector_offset != 0 {
+                    let ret = page
+                        .as_cached()
+                        .as_slice(page.len())
+                        .split_at(first_sector_offset as usize)
+                        .1;
+                    first_sector_offset = 0;
+                    ret
+                } else {
+                    page.as_cached().as_slice(page.len())
+                };
+
+                match buffer.fill(data)? {
+                    FillResult::Done(n) => nfilled += n,
+                    FillResult::Partial(n) => {
+                        nfilled += n;
+                        break 'outer;
+                    }
+                    FillResult::Full => {
+                        break 'outer;
+                    }
+                }
+            }
+
+            sector_start += nread;
+            sector_count -= nread;
+        }
+
+        if nfilled == buffer.total() {
+            Ok(FillResult::Done(nfilled))
+        } else {
+            Ok(FillResult::Partial(nfilled))
         }
     }
 }
+
+pub struct BlockDeviceRequest<'lt> {
+    pub sector: u64, // Sector to read from, in 512-byte blocks
+    pub count: u64,  // Number of sectors to read
+    pub buffer: &'lt [Page],
+}

+ 0 - 515
src/kernel/hw/ahci.cc

@@ -1,515 +0,0 @@
-#include <algorithm>
-#include <cstddef>
-#include <vector>
-
-#include <assert.h>
-#include <errno.h>
-#include <stdint.h>
-
-#include <kernel/hw/pci.hpp>
-#include <kernel/irq.hpp>
-#include <kernel/log.hpp>
-#include <kernel/mem/paging.hpp>
-#include <kernel/mem/phys.hpp>
-#include <kernel/module.hpp>
-#include <kernel/vfs.hpp>
-
-#define SPIN(cond, spin)                 \
-    (spin) = 0;                          \
-    while ((cond) && (spin) < MAX_SPINS) \
-        ++(spin);                        \
-    if ((spin) == MAX_SPINS)
-
-using namespace kernel::kmod;
-using namespace kernel::hw::pci;
-using namespace kernel::mem::paging;
-
-using kernel::mem::physaddr;
-
-constexpr uint32_t MAX_SPINS = 100000;
-
-constexpr uint16_t VENDOR_INTEL = 0x8086;
-constexpr uint16_t DEVICE_AHCI = 0x2922;
-
-constexpr uint32_t PCI_REG_ABAR = 0x09;
-
-constexpr uint32_t ATA_DEV_BSY = 0x08;
-constexpr uint32_t ATA_DEV_DRQ = 0x04;
-
-constexpr uint32_t PORT_CMD_ST = 0x00000001;
-constexpr uint32_t PORT_CMD_FRE = 0x00000010;
-constexpr uint32_t PORT_CMD_FR = 0x00004000;
-constexpr uint32_t PORT_CMD_CR = 0x00008000;
-
-namespace ahci {
-
-typedef volatile struct hba_port_t {
-    uint64_t command_list_base;
-    uint64_t fis_base;
-
-    uint32_t interrupt_status;
-    uint32_t interrupt_enable;
-
-    uint32_t command_status;
-
-    uint32_t : 32; // reserved
-
-    uint32_t task_file_data;
-    uint32_t signature;
-
-    uint32_t sata_status;
-    uint32_t sata_control;
-    uint32_t sata_error;
-    uint32_t sata_active;
-
-    uint32_t command_issue;
-    uint32_t sata_notification;
-
-    uint32_t fis_based_switch_control;
-
-    uint32_t reserved[11];
-    uint32_t vendor[4];
-} hba_port;
-
-typedef volatile struct hba_ghc_t {
-    uint32_t capabilities;
-    uint32_t global_host_control;
-    uint32_t interrupt_status;
-    uint32_t ports_implemented;
-    uint32_t version;
-    uint32_t command_completion_coalescing_control;
-    uint32_t command_completion_coalescing_ports;
-    uint32_t enclosure_management_location;
-    uint32_t enclosure_management_control;
-    uint32_t host_capabilities_extended;
-    uint32_t bios_handoff_control_status;
-    uint8_t reserved[0xa0 - 0x2c];
-    uint8_t vendor[0x100 - 0xa0];
-} hba_ghc;
-
-struct command_header {
-    uint8_t command_fis_length : 5;
-    uint8_t atapi : 1;
-    uint8_t write : 1;
-    uint8_t prefetchable : 1;
-
-    uint8_t reset : 1;
-    uint8_t bist : 1;
-    uint8_t volatile clear_busy_upon_ok : 1;
-    uint8_t reserved0 : 1;
-    uint8_t port_multiplier : 4;
-
-    uint16_t prdt_length;
-
-    uint32_t volatile bytes_transferred;
-
-    uint64_t command_table_base;
-
-    uint32_t reserved1[4];
-};
-
-enum fis_type {
-    FIS_REG_H2D = 0x27,
-    FIS_REG_D2H = 0x34,
-    FIS_DMA_ACT = 0x39,
-    FIS_DMA_SETUP = 0x41,
-    FIS_DATA = 0x46,
-    FIS_BIST = 0x58,
-    FIS_PIO_SETUP = 0x5f,
-    FIS_DEV_BITS = 0xa1,
-};
-
-struct fis_reg_h2d {
-    uint8_t fis_type;
-
-    uint8_t pm_port : 4;
-    uint8_t : 3; // reserved
-    uint8_t is_command : 1;
-
-    uint8_t command;
-    uint8_t feature;
-
-    uint8_t lba0;
-    uint8_t lba1;
-    uint8_t lba2;
-    uint8_t device;
-
-    uint8_t lba3;
-    uint8_t lba4;
-    uint8_t lba5;
-    uint8_t feature_high;
-
-    uint16_t count;
-    uint8_t iso_command_completion;
-    uint8_t control_register;
-
-    uint8_t reserved[4];
-};
-
-struct fis_reg_d2h {
-    uint8_t fis_type;
-
-    uint8_t pm_port : 4;
-    uint8_t : 2; // reserved
-    uint8_t interrupt : 1;
-    uint8_t : 1; // reserved
-
-    uint8_t status;
-    uint8_t error;
-
-    uint8_t lba0;
-    uint8_t lba1;
-    uint8_t lba2;
-    uint8_t device;
-
-    uint8_t lba3;
-    uint8_t lba4;
-    uint8_t lba5;
-    uint8_t : 8; // reserved
-
-    uint16_t count;
-    uint8_t reserved1[2];
-
-    uint8_t reserved2[4];
-};
-
-struct fis_pio_setup {
-    uint8_t fis_type;
-
-    uint8_t pm_port : 4;
-    uint8_t : 1;                         // reserved
-    uint8_t data_transfer_direction : 1; // device to host if set
-    uint8_t interrupt : 1;
-    uint8_t : 1; // reserved
-
-    uint8_t status;
-    uint8_t error;
-
-    uint8_t lba0;
-    uint8_t lba1;
-    uint8_t lba2;
-    uint8_t device;
-
-    uint8_t lba3;
-    uint8_t lba4;
-    uint8_t lba5;
-    uint8_t : 8; // reserved
-
-    uint16_t count;
-    uint8_t reserved1;
-    uint8_t new_status;
-
-    uint16_t transfer_count;
-    uint8_t reserved2[2];
-};
-
-struct received_fis {
-    uint8_t fis_dma_setup[32]; // we don't care about it for now
-
-    fis_pio_setup fispio;
-    uint8_t padding[12];
-
-    fis_reg_d2h fisreg;
-    uint8_t padding2[4];
-
-    uint8_t fissdb[8];
-
-    uint8_t ufis[64];
-
-    uint8_t reserved[96];
-};
-
-struct prdt_entry {
-    uint64_t data_base;
-
-    uint32_t reserved0;
-
-    uint32_t byte_count : 22;
-    uint32_t reserved1 : 9;
-    uint32_t interrupt : 1;
-};
-
-struct command_table {
-    fis_reg_h2d command_fis;
-
-    uint8_t reserved1[44];
-
-    uint8_t atapi_command[16];
-
-    uint8_t reserved2[48];
-
-    prdt_entry prdt[];
-};
-
-static int stop_command(hba_port* port) {
-    port->command_status = port->command_status & ~(PORT_CMD_ST | PORT_CMD_FRE);
-
-    uint32_t spins = 0;
-    SPIN(port->command_status & (PORT_CMD_CR | PORT_CMD_FR), spins)
-    return -1;
-
-    return 0;
-}
-
-static int start_command(hba_port* port) {
-    uint32_t spins = 0;
-    SPIN(port->command_status & PORT_CMD_CR, spins)
-    return -1;
-
-    port->command_status = port->command_status | PORT_CMD_FRE;
-    port->command_status = port->command_status | PORT_CMD_ST;
-
-    return 0;
-}
-
-static inline hba_port* port_ptr(hba_ghc* ghc, int i) {
-    return (hba_port*)((char*)ghc + 0x100 + i * 0x80);
-}
-
-template <std::size_t N>
-struct quick_queue {
-    std::size_t start{};
-    std::size_t end{};
-    uint8_t arr[N]{};
-
-    quick_queue() {
-        for (std::size_t i = 0; i < N; ++i)
-            arr[i] = i;
-    }
-
-    bool empty() { return start == end; }
-    void push(uint8_t val) { arr[end++ % N] = val; }
-    uint8_t pop() { return arr[start++ % N]; }
-};
-
-struct ahci_port {
-   private:
-    // quick_queue<32> qu;
-    physaddr<command_header, false> cmd_header;
-    hba_port* port;
-    received_fis* fis{};
-    std::size_t sectors{-1U};
-
-    int send_command(physaddr<void> buf, uint64_t lba, uint32_t count,
-                     uint8_t cmd, bool write) {
-        // count must be a multiple of 512
-        if (count & (512 - 1))
-            return -1;
-
-        // TODO: get an availablee command slot
-        int n = 0;
-        // auto n = qu.pop();
-
-        // command fis and prdt will take up the lower 128+Bytes
-        // TODO: buffer array
-        pfn_t command_table_pfn = page_to_pfn(alloc_page());
-        physaddr<command_table, false> cmdtable{command_table_pfn};
-
-        // construct command header
-        memset(cmd_header + n, 0x00, sizeof(command_header));
-        cmd_header[n].command_fis_length = 5;
-        cmd_header[n].clear_busy_upon_ok = 1;
-
-        cmd_header[n].write = write;
-        cmd_header[n].prdt_length = 1;
-        cmd_header[n].command_table_base = cmdtable.phys();
-
-        memset(cmdtable, 0x00, sizeof(command_table) + sizeof(prdt_entry));
-
-        // first, set up command fis
-        cmdtable->command_fis.fis_type = FIS_REG_H2D;
-        cmdtable->command_fis.is_command = 1;
-        cmdtable->command_fis.command = cmd;
-
-        cmdtable->command_fis.lba0 = lba & 0xff;
-        cmdtable->command_fis.lba1 = (lba >> 8) & 0xff;
-        cmdtable->command_fis.lba2 = (lba >> 16) & 0xff;
-        cmdtable->command_fis.device = 1 << 6; // lba mode
-        cmdtable->command_fis.lba3 = (lba >> 24) & 0xff;
-        cmdtable->command_fis.lba4 = (lba >> 32) & 0xff;
-        cmdtable->command_fis.lba5 = (lba >> 40) & 0xff;
-
-        cmdtable->command_fis.count = count >> 9;
-
-        // fill in prdt
-        auto* pprdt = cmdtable->prdt;
-        pprdt->data_base = buf.phys();
-        pprdt->byte_count = count;
-        pprdt->interrupt = 1;
-
-        // clear the received fis
-        memset(fis, 0x00, sizeof(received_fis));
-
-        // wait until port is not busy
-        uint32_t spins = 0;
-        SPIN(port->task_file_data & (ATA_DEV_BSY | ATA_DEV_DRQ), spins)
-        return -1;
-
-        // TODO: use interrupt
-        // issue the command
-        port->command_issue = 1 << n;
-
-        SPIN(port->command_issue & (1 << n), spins)
-        return -1;
-
-        free_page(command_table_pfn);
-        return 0;
-    }
-
-    int identify() {
-        pfn_t buffer_page = page_to_pfn(alloc_page());
-        int ret =
-            send_command(physaddr<void>{buffer_page}, 0, 512, 0xEC, false);
-
-        free_page(buffer_page);
-        if (ret != 0)
-            return -1;
-        return 0;
-    }
-
-   public:
-    explicit ahci_port(hba_port* port)
-        : cmd_header{page_to_pfn(alloc_page())}, port(port) {}
-
-    ~ahci_port() {
-        if (!cmd_header)
-            return;
-        free_page(cmd_header.phys());
-    }
-
-    ssize_t read(char* buf, std::size_t buf_size, std::size_t offset,
-                 std::size_t cnt) {
-        cnt = std::min(buf_size, cnt);
-
-        pfn_t buffer_page = page_to_pfn(alloc_page());
-        physaddr<void> buffer_ptr{buffer_page};
-
-        char* orig_buf = buf;
-        size_t start = offset / 512;
-        size_t end = std::min((offset + cnt + 511) / 512, sectors);
-
-        offset -= start * 512;
-        for (size_t i = start; i < end; i += 4096UL / 512) {
-            size_t n_read = std::min(end - i, 4096UL / 512) * 512;
-            int status = send_command(buffer_ptr, i, n_read, 0xC8, false);
-            if (status != 0) {
-                free_page(buffer_page);
-                return -EIO;
-            }
-
-            size_t to_copy = std::min(cnt, n_read - offset);
-            memcpy(buf, (std::byte*)(void*)buffer_ptr + offset, to_copy);
-            offset = 0;
-            buf += to_copy;
-            cnt -= to_copy;
-        }
-
-        free_page(buffer_page);
-        return buf - orig_buf;
-    }
-
-    int init() {
-        if (stop_command(port) != 0)
-            return -1;
-
-        // TODO: use interrupt
-        // this is the PxIE register, setting bits here will make
-        //      it generate corresponding interrupts in PxIS
-        //
-        // port->interrupt_enable = 1;
-
-        port->command_list_base = cmd_header.phys();
-        port->fis_base = cmd_header.phys() + 0x400;
-
-        fis = (received_fis*)(cmd_header + 1);
-
-        if (start_command(port) != 0)
-            return -1;
-
-        if (identify() != 0)
-            return -1;
-
-        return 0;
-    }
-};
-
-class ahci_module : public virtual kmod {
-   private:
-    hba_ghc* ghc{};
-    pci_device* dev{};
-    std::vector<ahci_port*> ports;
-
-   public:
-    ahci_module() : kmod("ahci") {}
-    ~ahci_module() {
-        // TODO: release PCI device
-        for (auto& item : ports) {
-            if (!item)
-                continue;
-
-            delete item;
-            item = nullptr;
-        }
-    }
-
-    int probe_disks() {
-        int ports = this->ghc->ports_implemented;
-        for (int n = 0; ports; ports >>= 1, ++n) {
-            if (!(ports & 1))
-                continue;
-
-            auto* ghc_port = port_ptr(this->ghc, n);
-            if ((ghc_port->sata_status & 0x0f) != 0x03)
-                continue;
-
-            auto* port = new ahci_port(ghc_port);
-            if (port->init() != 0) {
-                delete port;
-                kmsg("An error occurred while configuring an ahci port");
-                continue;
-            }
-
-            this->ports[n] = port;
-
-            fs::register_block_device(
-                fs::make_device(8, n * 8),
-                {[port](char* buf, std::size_t buf_size, std::size_t offset,
-                        std::size_t cnt) {
-                     return port->read(buf, buf_size, offset, cnt);
-                 },
-                 nullptr});
-
-            fs::partprobe();
-        }
-
-        return 0;
-    }
-
-    virtual int init() override {
-        ports.resize(32);
-
-        auto ret = kernel::hw::pci::register_driver(
-            VENDOR_INTEL, DEVICE_AHCI, [this](pci_device& dev) -> int {
-                this->dev = &dev;
-
-                auto pointerBase = dev.header_type0().bars[5];
-                assert((pointerBase & 0xf) == 0);
-
-                physaddr<hba_ghc, false> pp_base{pointerBase & ~0xf};
-                this->ghc = pp_base;
-
-                this->ghc->global_host_control =
-                    this->ghc->global_host_control | 2; // set interrupt enable
-
-                return this->probe_disks();
-            });
-
-        if (ret != 0)
-            return ret;
-        return 0;
-    }
-};
-
-} // namespace ahci
-
-INTERNAL_MODULE(ahci, ahci::ahci_module);

+ 4 - 2
src/kernel/hw/pci.cc

@@ -69,8 +69,10 @@ int register_driver(uint16_t vendor, uint16_t device, driver_t drv) {
     auto deviter = s_pci_devices.find(dev);
 
     // TODO: check status or print log
-    if (deviter != s_pci_devices.end())
-        drv(*deviter->second);
+    if (deviter != s_pci_devices.end()) {
+        int ret = drv(*deviter->second);
+        assert(ret == 0);
+    }
 
     return 0;
 }

+ 8 - 2
src/kernel/hw/serial.cc

@@ -75,8 +75,14 @@ class serial_tty : public virtual tty {
     }
 
     virtual void putchar(char c) override {
-        while (!(*ports[5] & 0x20))
-            ; // nop
+        while (true) {
+            auto status = *ports[5];
+            if (status & 0x1)
+                this->commit_char(*ports[0]);
+            if (status & 0x20)
+                break;
+        }
+
         ports[0] = c;
     }
 };

+ 0 - 1
src/kernel/mem/slab.cc

@@ -101,7 +101,6 @@ void kernel::mem::slab_free(void* ptr) {
         if (cache->slabs_full == slab) {
             head = &cache->slabs_full;
         } else {
-            assert(cache->slabs_partial == slab);
             head = &cache->slabs_partial;
         }
 

+ 0 - 93
src/kernel/vfs.cpp

@@ -155,7 +155,6 @@ fs::fifo_file::~fifo_file() {
         ppipe->close_write();
 }
 
-static fs::blkdev_ops** blkdevs[256];
 static fs::chrdev_ops** chrdevs[256];
 
 std::pair<fs::dentry_pointer, int> fs::open(const fs_context& context,
@@ -220,20 +219,6 @@ std::pair<fs::dentry_pointer, int> fs::open(const fs_context& context,
     return {std::move(cwd), 0};
 }
 
-int fs::register_block_device(dev_t node, const fs::blkdev_ops& ops) {
-    int major = NODE_MAJOR(node);
-    int minor = NODE_MINOR(node);
-
-    if (!blkdevs[major])
-        blkdevs[major] = new blkdev_ops* [256] {};
-
-    if (blkdevs[major][minor])
-        return -EEXIST;
-
-    blkdevs[major][minor] = new blkdev_ops{ops};
-    return 0;
-}
-
 int fs::register_char_device(dev_t node, const fs::chrdev_ops& ops) {
     int major = NODE_MAJOR(node);
     int minor = NODE_MINOR(node);
@@ -267,84 +252,6 @@ struct PACKED mbr {
     uint16_t magic;
 };
 
-// TODO: devtmpfs
-static int mbr_part_probe(dev_t node) {
-    mbr buf_mbr;
-
-    int ret = fs::block_device_read(node, (char*)&buf_mbr, sizeof(mbr), 0, 512);
-    if (ret < 0)
-        return -EIO;
-
-    int n = 1;
-    for (const auto& part : buf_mbr.parts) {
-        if (n >= 16)
-            break;
-
-        if (!part.type)
-            continue;
-
-        std::size_t part_offset = part.lba_start * 512;
-
-        // TODO: add partition offset limit
-        fs::register_block_device(
-            node + n,
-            {[=](char* buf, size_t buf_size, size_t offset,
-                 size_t n) -> ssize_t {
-                 offset += part_offset;
-                 return fs::block_device_read(node, buf, buf_size, offset, n);
-             },
-             [=](const char* buf, size_t offset, size_t n) -> ssize_t {
-                 offset += part_offset;
-                 return fs::block_device_write(node, buf, offset, n);
-             }});
-
-        ++n;
-    }
-
-    return 0;
-}
-
-void fs::partprobe() {
-    for (int i = 0; i < 256; i += 16) {
-        int ret = mbr_part_probe(make_device(8, i));
-
-        if (ret != 0)
-            continue;
-
-        kmsgf("[info] found disk drive sd%c\n", 'a' + (i / 16));
-    }
-}
-
-ssize_t fs::block_device_read(dev_t node, char* buf, size_t buf_size,
-                              size_t offset, size_t n) {
-    int major = NODE_MAJOR(node);
-    int minor = NODE_MINOR(node);
-
-    if (!blkdevs[major] || !blkdevs[major][minor])
-        return -EINVAL;
-
-    auto& read = blkdevs[major][minor]->read;
-    if (!read)
-        return -EINVAL;
-
-    return read(buf, buf_size, offset, n);
-}
-
-ssize_t fs::block_device_write(dev_t node, const char* buf, size_t offset,
-                               size_t n) {
-    int major = NODE_MAJOR(node);
-    int minor = NODE_MINOR(node);
-
-    if (!blkdevs[major] || !blkdevs[major][minor])
-        return -EINVAL;
-
-    auto& write = blkdevs[major][minor]->write;
-    if (!write)
-        return -EINVAL;
-
-    return write(buf, offset, n);
-}
-
 ssize_t fs::char_device_read(dev_t node, char* buf, size_t buf_size, size_t n) {
     int major = NODE_MAJOR(node);
     int minor = NODE_MINOR(node);

+ 2 - 2
src/kernel/vfs/dentry.rs

@@ -92,8 +92,8 @@ impl Dentry {
         unsafe { Arc::from_raw(self.inode) }
     }
 
-    pub fn take_fs(&self) -> Arc<Mutex<dyn Vfs>> {
-        unsafe { Arc::from_raw(self.fs) }
+    pub fn take_fs(&self) -> Weak<Mutex<dyn Vfs>> {
+        unsafe { Weak::from_raw(self.fs) }
     }
 }
 

+ 10 - 39
src/kernel/vfs/ffi.rs

@@ -1,4 +1,4 @@
-use crate::prelude::*;
+use crate::{io::ByteBuffer, kernel::block::BlockDevice, prelude::*};
 
 use core::ffi::{c_char, c_void};
 
@@ -56,32 +56,18 @@ pub extern "C" fn fs_mount(
 fn do_read(
     file: Arc<dyn Inode>,
     buffer: &mut [u8],
-    count: usize,
     offset: usize,
 ) -> KResult<usize> {
     let mode = { file.idata().lock().mode };
 
     match mode {
         mode if s_isdir(mode) => Err(EISDIR),
-        mode if s_isreg(mode) => file.read(buffer, count, offset),
+        mode if s_isreg(mode) => file.read(buffer, offset),
         mode if s_isblk(mode) => {
-            let devid = file.devid()?;
-
-            let ret = unsafe {
-                fs::block_device_read(
-                    devid,
-                    buffer.as_mut_ptr() as *mut _,
-                    buffer.len(),
-                    offset,
-                    count,
-                )
-            };
+            let device = BlockDevice::get(file.devid()?)?;
+            let mut buffer = ByteBuffer::new(buffer);
 
-            if ret < 0 {
-                Err(-ret as u32)
-            } else {
-                Ok(ret as usize)
-            }
+            Ok(device.read_some(offset, &mut buffer)?.allow_partial())
         }
         mode if s_ischr(mode) => {
             let devid = file.devid()?;
@@ -91,7 +77,7 @@ fn do_read(
                     devid,
                     buffer.as_mut_ptr() as *mut _,
                     buffer.len(),
-                    count,
+                    buffer.len(),
                 )
             };
 
@@ -115,24 +101,7 @@ fn do_write(
     match mode {
         mode if s_isdir(mode) => Err(EISDIR),
         mode if s_isreg(mode) => file.write(buffer, offset),
-        mode if s_isblk(mode) => {
-            let devid = file.devid()?;
-
-            let ret = unsafe {
-                fs::block_device_write(
-                    devid,
-                    buffer.as_ptr() as *const _,
-                    offset,
-                    buffer.len(),
-                )
-            };
-
-            if ret < 0 {
-                Err(-ret as u32)
-            } else {
-                Ok(ret as usize)
-            }
-        }
+        mode if s_isblk(mode) => Err(EINVAL), // TODO
         mode if s_ischr(mode) => {
             let devid = file.devid()?;
 
@@ -163,9 +132,11 @@ pub extern "C" fn fs_read(
     n: usize,
 ) -> isize {
     let file = raw_inode_clone(file);
+
+    let bufsize = bufsize.min(n);
     let buffer = into_mut_slice(buf, &bufsize);
 
-    match do_read(file, buffer, n, offset) {
+    match do_read(file, buffer, offset) {
         Ok(n) => n as isize,
         Err(e) => -(e as isize),
     }

+ 1 - 6
src/kernel/vfs/inode.rs

@@ -170,12 +170,7 @@ pub trait Inode {
         Err(EINVAL)
     }
 
-    fn read(
-        &self,
-        buffer: &mut [u8],
-        count: usize,
-        offset: usize,
-    ) -> KResult<usize> {
+    fn read(&self, buffer: &mut [u8], offset: usize) -> KResult<usize> {
         Err(EINVAL)
     }
 

+ 1 - 0
src/kernel/vfs/mod.rs

@@ -38,6 +38,7 @@ pub fn s_isblk(mode: Mode) -> bool {
     (mode & S_IFMT) == S_IFBLK
 }
 
+#[derive(Clone, Copy, Default)]
 #[repr(C)]
 pub struct TimeSpec {
     pub sec: u64,

+ 1 - 0
src/lib.rs

@@ -59,6 +59,7 @@ static ALLOCATOR: Allocator = Allocator {};
 #[no_mangle]
 pub extern "C" fn late_init_rust() {
     driver::e1000e::register_e1000e_driver();
+    driver::ahci::register_ahci_driver();
 
     fs::tmpfs::init();
     fs::procfs::init();