Преглед изворни кода

vfs: rework the vfs subsystem with async

Inode and superblock rework:

Remove old Inode trait as it used to undertake too much responsibility.
The new method use three new traits: InodeOps is used to acquire generic
inode attributes. InodeFileOps and InodeDirOps handle file and directory
requests respectively. All the three have async fn trait methods and
don't need to be ?Sized. Then, we implement Inode, InodeFile and
InodeDir for the implementors of the three "Ops" traits, erasing their
actual type and provide generic dyn interface by wrapping the futures in
boxes. We should provide an io worker? or some io context with an
allocator for futures to reduce the overhead of io requests, or come up
with some better ideas?

For inode usage, we introduce InodeRef and InodeUse. InodeRef is a
simple wrapper for Weak<impl Inode> and InodeUse for Arc<impl Inode>.
This helps us use them better as we can't define impls for Arc<dyn
Inode>'s as they are foreign types. We also provide some more helper
methods for them.

After the change, we don't impose ANY structural restriction except for
the spinlock wrapped InodeInfo. The InodeInfo struct design might need
rethinking but the current implementation seems to be fine aside from
unnecessary locks when we try to access some of its fields but this
shouldn't be a VERY big problem and very urgent...

Similar changes are also made to superblock traits and types. But for
the superblock objects, we use a SuperBlockComplex struct to store
common fields such as whether the superblock is read only, their device
id and so on. Also the structs have a superblock rwsem inside. But we
haven't decided how to use that (such as whether we should acquire the
lock and pass it to the inode methods) and even whether they should
exist and be there. This will need further thinking so we put this off
for now...

Filesystem rework:

Rework tmpfs, fatfs and procfs with the new technology mentioned above,
leaving the old ext4 unchanged. The current implementation of ext4 uses
some "random" library from the "camp". Its code hasn't been fully
reviewed for time reasons but seems to be rather "problematic"... We
might rewrite the whole module later and the page cache subsystem
requires fully reworking as well. So we put this work off as well.

Block device and other parts rework:

Wraps PageCacheBackend, MountCreator and BlockRequestQueue with
async_trait to provide dyn compatible async functions. Dentry walking
functions are also put to the heap since they are recursive functions...
This has similar problems to the inode traits, ugly solution. Further
optimization is required.

Signed-off-by: greatbridf <greatbridf@icloud.com>
greatbridf пре 5 месеци
родитељ
комит
19f9ef3351
53 измењених фајлова са 2986 додато и 2529 уклоњено
  1. 1 1
      .vscode/tasks.json
  2. 98 0
      Cargo.lock
  3. 2 0
      Cargo.toml
  4. 35 28
      src/driver/ahci/mod.rs
  5. 28 20
      src/driver/ahci/port.rs
  6. 10 9
      src/driver/ahci/slot.rs
  7. 5 3
      src/driver/e1000e.rs
  8. 3 3
      src/driver/serial.rs
  9. 2 5
      src/driver/virtio/riscv64.rs
  10. 4 1
      src/driver/virtio/virtio_blk.rs
  11. 18 17
      src/fs/ext4.rs
  12. 337 230
      src/fs/fat32.rs
  13. 98 116
      src/fs/fat32/dir.rs
  14. 15 31
      src/fs/fat32/file.rs
  15. 1 2
      src/fs/mod.rs
  16. 188 249
      src/fs/procfs.rs
  17. 0 146
      src/fs/shm.rs
  18. 0 613
      src/fs/tmpfs.rs
  19. 415 0
      src/fs/tmpfs/dir.rs
  20. 298 0
      src/fs/tmpfs/file.rs
  21. 73 0
      src/fs/tmpfs/mod.rs
  22. 18 20
      src/kernel/block.rs
  23. 1 1
      src/kernel/block/mbr.rs
  24. 7 8
      src/kernel/chardev.rs
  25. 1 1
      src/kernel/mem.rs
  26. 3 12
      src/kernel/mem/mm_list/mapping.rs
  27. 47 8
      src/kernel/mem/page_cache.rs
  28. 6 3
      src/kernel/pcie/driver.rs
  29. 56 38
      src/kernel/syscall/file_rw.rs
  30. 2 120
      src/kernel/syscall/mm.rs
  31. 12 8
      src/kernel/syscall/procops.rs
  32. 19 15
      src/kernel/task/loader/elf.rs
  33. 8 5
      src/kernel/task/loader/mod.rs
  34. 2 0
      src/kernel/timer.rs
  35. 150 145
      src/kernel/vfs/dentry.rs
  36. 7 22
      src/kernel/vfs/dentry/dcache.rs
  37. 67 72
      src/kernel/vfs/file/inode_file.rs
  38. 14 9
      src/kernel/vfs/filearray.rs
  39. 0 494
      src/kernel/vfs/inode.rs
  40. 31 0
      src/kernel/vfs/inode/ino.rs
  41. 389 0
      src/kernel/vfs/inode/inode.rs
  42. 10 0
      src/kernel/vfs/inode/mod.rs
  43. 18 0
      src/kernel/vfs/inode/ops.rs
  44. 97 0
      src/kernel/vfs/inode/statx.rs
  45. 11 11
      src/kernel/vfs/mod.rs
  46. 20 12
      src/kernel/vfs/mount.rs
  47. 127 0
      src/kernel/vfs/superblock.rs
  48. 36 0
      src/kernel/vfs/types/device_id.rs
  49. 5 0
      src/kernel/vfs/types/mod.rs
  50. 169 0
      src/kernel/vfs/types/mode.rs
  51. 0 10
      src/kernel/vfs/vfs.rs
  52. 21 12
      src/lib.rs
  53. 1 29
      src/prelude.rs

+ 1 - 1
.vscode/tasks.json

@@ -6,7 +6,7 @@
         {
             "label": "debug run riscv64",
             "type": "shell",
-            "command": "make srun ARCH=riscv64",
+            "command": "make srun ARCH=riscv64 IMG=/Volumes/oscomp/sdcard-rv.img",
             "isBackground": true,
             "problemMatcher": [
                 {

+ 98 - 0
Cargo.lock

@@ -28,6 +28,17 @@ dependencies = [
  "log",
 ]
 
+[[package]]
+name = "async-trait"
+version = "0.1.89"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "atomic_unique_refcell"
 version = "0.1.0"
@@ -144,6 +155,7 @@ dependencies = [
  "acpi",
  "align_ext",
  "another_ext4",
+ "async-trait",
  "atomic_unique_refcell",
  "bitflags",
  "buddy_allocator",
@@ -155,6 +167,7 @@ dependencies = [
  "eonix_preempt",
  "eonix_runtime",
  "eonix_sync",
+ "futures",
  "intrusive-collections 0.9.8",
  "intrusive_list",
  "itertools",
@@ -265,6 +278,79 @@ version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "784a4df722dc6267a04af36895398f59d21d07dce47232adf31ec0ff2fa45e67"
 
+[[package]]
+name = "futures"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "futures-io",
+ "futures-sink",
+ "futures-task",
+ "futures-util",
+]
+
+[[package]]
+name = "futures-channel"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10"
+dependencies = [
+ "futures-core",
+ "futures-sink",
+]
+
+[[package]]
+name = "futures-core"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
+
+[[package]]
+name = "futures-io"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6"
+
+[[package]]
+name = "futures-macro"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "futures-sink"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7"
+
+[[package]]
+name = "futures-task"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
+
+[[package]]
+name = "futures-util"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
+dependencies = [
+ "futures-core",
+ "futures-macro",
+ "futures-sink",
+ "futures-task",
+ "pin-project-lite",
+ "pin-utils",
+]
+
 [[package]]
 name = "gimli"
 version = "0.32.0"
@@ -332,6 +418,18 @@ version = "1.0.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
 
+[[package]]
+name = "pin-project-lite"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
+
+[[package]]
+name = "pin-utils"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
+
 [[package]]
 name = "pointers"
 version = "0.1.0"

+ 2 - 0
Cargo.toml

@@ -34,6 +34,8 @@ another_ext4 = { git = "https://github.com/SMS-Derfflinger/another_ext4", branch
 stalloc = { version = "0.6.1", default-features = false, features = [
     "allocator-api",
 ] }
+async-trait = "0.1.89"
+futures = { version = "0.3.31", features = ["alloc", "async-await"], default-features = false }
 
 [target.'cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))'.dependencies]
 virtio-drivers = { version = "0.11.0" }

+ 35 - 28
src/driver/ahci/mod.rs

@@ -2,15 +2,16 @@ use crate::{
     fs::procfs,
     io::Buffer as _,
     kernel::{
-        block::{make_device, BlockDevice},
+        block::BlockDevice,
         constants::{EINVAL, EIO},
         interrupt::register_irq_handler,
         pcie::{self, Header, PCIDevice, PCIDriver, PciError},
-        task::block_on,
+        vfs::types::DeviceId,
     },
     prelude::*,
 };
 use alloc::{format, sync::Arc};
+use async_trait::async_trait;
 use control::AdapterControl;
 use defs::*;
 use eonix_mm::address::{AddrOps as _, PAddr};
@@ -108,7 +109,30 @@ impl Device<'_> {
 }
 
 impl Device<'static> {
-    fn probe_ports(&self) -> KResult<()> {
+    async fn probe_port(&self, port: Arc<AdapterPort<'static>>) -> KResult<()> {
+        port.init().await?;
+
+        {
+            let port = port.clone();
+            let name = format!("ahci-p{}-stats", port.nport);
+            procfs::populate_root(name.into_bytes().into(), move |buffer| {
+                port.print_stats(&mut buffer.get_writer())
+            })
+            .await;
+        }
+
+        let port = BlockDevice::register_disk(
+            DeviceId::new(8, port.nport as u16 * 16),
+            2147483647, // TODO: get size from device
+            port,
+        )?;
+
+        port.partprobe().await?;
+
+        Ok(())
+    }
+
+    async fn probe_ports(&self) -> KResult<()> {
         for nport in self.control.implemented_ports() {
             let port = Arc::new(AdapterPort::new(self.control_base, nport));
             if !port.status_ok() {
@@ -116,27 +140,7 @@ impl Device<'static> {
             }
 
             self.ports.lock_irq()[nport as usize] = Some(port.clone());
-            if let Err(e) = (|| -> KResult<()> {
-                port.init()?;
-
-                {
-                    let port = port.clone();
-                    let name = format!("ahci-p{}-stats", port.nport);
-                    procfs::populate_root(name.into_bytes().into(), move |buffer| {
-                        port.print_stats(&mut buffer.get_writer())
-                    })?;
-                }
-
-                let port = BlockDevice::register_disk(
-                    make_device(8, nport * 16),
-                    2147483647, // TODO: get size from device
-                    port,
-                )?;
-
-                block_on(port.partprobe())?;
-
-                Ok(())
-            })() {
+            if let Err(e) = self.probe_port(port).await {
                 self.ports.lock_irq()[nport as usize] = None;
                 println_warn!("probe port {nport} failed with {e}");
             }
@@ -154,6 +158,7 @@ impl AHCIDriver {
     }
 }
 
+#[async_trait]
 impl PCIDriver for AHCIDriver {
     fn vendor_id(&self) -> u16 {
         VENDOR_INTEL
@@ -163,7 +168,7 @@ impl PCIDriver for AHCIDriver {
         DEVICE_AHCI
     }
 
-    fn handle_device(&self, pcidev: Arc<PCIDevice<'static>>) -> Result<(), PciError> {
+    async fn handle_device(&self, pcidev: Arc<PCIDevice<'static>>) -> Result<(), PciError> {
         let Header::Endpoint(header) = pcidev.header else {
             Err(EINVAL)?
         };
@@ -200,7 +205,7 @@ impl PCIDriver for AHCIDriver {
         let device_irq = device.clone();
         register_irq_handler(irqno as i32, move || device_irq.handle_interrupt())?;
 
-        device.probe_ports()?;
+        device.probe_ports().await?;
 
         self.devices.lock().push(device);
 
@@ -208,6 +213,8 @@ impl PCIDriver for AHCIDriver {
     }
 }
 
-pub fn register_ahci_driver() {
-    pcie::register_driver(AHCIDriver::new()).expect("Register ahci driver failed");
+pub async fn register_ahci_driver() {
+    pcie::register_driver(AHCIDriver::new())
+        .await
+        .expect("Register ahci driver failed");
 }

+ 28 - 20
src/driver/ahci/port.rs

@@ -9,9 +9,9 @@ use crate::kernel::block::{BlockDeviceRequest, BlockRequestQueue};
 use crate::kernel::constants::{EINVAL, EIO};
 use crate::kernel::mem::paging::Page;
 use crate::kernel::mem::AsMemoryBlock as _;
-use crate::kernel::task::block_on;
 use crate::prelude::*;
 use alloc::collections::vec_deque::VecDeque;
+use async_trait::async_trait;
 use core::pin::pin;
 use eonix_mm::address::{Addr as _, PAddr};
 use eonix_sync::{SpinIrq as _, WaitList};
@@ -145,18 +145,25 @@ impl AdapterPort<'_> {
         self.sata_status().read_once() & 0xf == 0x3
     }
 
-    fn get_free_slot(&self) -> u32 {
+    async fn get_free_slot(&self) -> u32 {
         loop {
-            let mut free_list = self.free_list.lock_irq();
-            let free_slot = free_list.free.pop_front();
-            if let Some(slot) = free_slot {
-                return slot;
-            }
             let mut wait = pin!(self.free_list_wait.prepare_to_wait());
-            wait.as_mut().add_to_wait_list();
-            drop(free_list);
 
-            block_on(wait);
+            {
+                let mut free_list = self.free_list.lock_irq();
+
+                if let Some(slot) = free_list.free.pop_front() {
+                    return slot;
+                }
+
+                wait.as_mut().add_to_wait_list();
+
+                if let Some(slot) = free_list.free.pop_front() {
+                    return slot;
+                }
+            }
+
+            wait.await;
         }
     }
 
@@ -204,11 +211,11 @@ impl AdapterPort<'_> {
         Ok(())
     }
 
-    fn send_command(&self, cmd: &impl Command) -> KResult<()> {
+    async fn send_command(&self, cmd: &impl Command) -> KResult<()> {
         let mut cmdtable = CommandTable::new();
         cmdtable.setup(cmd);
 
-        let slot_index = self.get_free_slot();
+        let slot_index = self.get_free_slot().await;
         let slot = &self.slots[slot_index as usize];
 
         slot.prepare_command(&cmdtable, cmd.write());
@@ -222,7 +229,7 @@ impl AdapterPort<'_> {
 
         self.stats.inc_cmd_sent();
 
-        if let Err(_) = block_on(slot.wait_finish()) {
+        if let Err(_) = slot.wait_finish().await {
             self.stats.inc_cmd_error();
             return Err(EIO);
         };
@@ -231,16 +238,16 @@ impl AdapterPort<'_> {
         Ok(())
     }
 
-    fn identify(&self) -> KResult<()> {
+    async fn identify(&self) -> KResult<()> {
         let cmd = IdentifyCommand::new();
 
         // TODO: check returned data
-        self.send_command(&cmd)?;
+        self.send_command(&cmd).await?;
 
         Ok(())
     }
 
-    pub fn init(&self) -> KResult<()> {
+    pub async fn init(&self) -> KResult<()> {
         self.stop_command()?;
 
         self.command_list_base()
@@ -251,7 +258,7 @@ impl AdapterPort<'_> {
 
         self.start_command()?;
 
-        match self.identify() {
+        match self.identify().await {
             Err(err) => {
                 self.stop_command()?;
                 Err(err)
@@ -269,12 +276,13 @@ impl AdapterPort<'_> {
     }
 }
 
+#[async_trait]
 impl BlockRequestQueue for AdapterPort<'_> {
     fn max_request_pages(&self) -> u64 {
         1024
     }
 
-    fn submit(&self, req: BlockDeviceRequest) -> KResult<()> {
+    async fn submit<'a>(&'a self, req: BlockDeviceRequest<'a>) -> KResult<()> {
         match req {
             BlockDeviceRequest::Read {
                 sector,
@@ -287,7 +295,7 @@ impl BlockRequestQueue for AdapterPort<'_> {
 
                 let command = ReadLBACommand::new(buffer, sector, count as u16)?;
 
-                self.send_command(&command)
+                self.send_command(&command).await
             }
             BlockDeviceRequest::Write {
                 sector,
@@ -300,7 +308,7 @@ impl BlockRequestQueue for AdapterPort<'_> {
 
                 let command = WriteLBACommand::new(buffer, sector, count as u16)?;
 
-                self.send_command(&command)
+                self.send_command(&command).await
             }
         }
     }

+ 10 - 9
src/driver/ahci/slot.rs

@@ -71,19 +71,20 @@ impl<'a> CommandSlot<'a> {
 
     pub async fn wait_finish(&self) -> KResult<()> {
         let mut inner = loop {
-            let inner = self.inner.lock_irq();
-            if inner.state != SlotState::Working {
-                break inner;
-            }
-
             let mut wait = pin!(self.wait_list.prepare_to_wait());
-            wait.as_mut().add_to_wait_list();
 
-            if inner.state != SlotState::Working {
-                break inner;
+            {
+                let inner = self.inner.lock_irq();
+                if inner.state != SlotState::Working {
+                    break inner;
+                }
+                wait.as_mut().add_to_wait_list();
+
+                if inner.state != SlotState::Working {
+                    break inner;
+                }
             }
 
-            drop(inner);
             wait.await;
         };
 

+ 5 - 3
src/driver/e1000e.rs

@@ -8,6 +8,7 @@ use crate::prelude::*;
 use alloc::boxed::Box;
 use alloc::sync::Arc;
 use alloc::vec::Vec;
+use async_trait::async_trait;
 use core::ptr::NonNull;
 use eonix_hal::fence::memory_barrier;
 use eonix_mm::address::{Addr, PAddr};
@@ -437,6 +438,7 @@ struct Driver {
     dev_id: u16,
 }
 
+#[async_trait]
 impl PCIDriver for Driver {
     fn vendor_id(&self) -> u16 {
         0x8086
@@ -446,7 +448,7 @@ impl PCIDriver for Driver {
         self.dev_id
     }
 
-    fn handle_device(&self, device: Arc<PCIDevice<'static>>) -> Result<(), PciError> {
+    async fn handle_device(&self, device: Arc<PCIDevice<'static>>) -> Result<(), PciError> {
         let Header::Endpoint(header) = device.header else {
             Err(EINVAL)?
         };
@@ -473,10 +475,10 @@ impl PCIDriver for Driver {
     }
 }
 
-pub fn register_e1000e_driver() {
+pub async fn register_e1000e_driver() {
     let dev_ids = [0x100e, 0x10d3, 0x10ea, 0x153a];
 
     for id in dev_ids.into_iter() {
-        pcie::register_driver(Driver { dev_id: id }).unwrap();
+        pcie::register_driver(Driver { dev_id: id }).await.unwrap();
     }
 }

+ 3 - 3
src/driver/serial.rs

@@ -2,8 +2,8 @@ mod io;
 
 use crate::{
     kernel::{
-        block::make_device, console::set_console, constants::EIO, interrupt::register_irq_handler,
-        CharDevice, CharDeviceType, Terminal, TerminalDevice,
+        console::set_console, constants::EIO, interrupt::register_irq_handler,
+        vfs::types::DeviceId, CharDevice, CharDeviceType, Terminal, TerminalDevice,
     },
     prelude::*,
 };
@@ -167,7 +167,7 @@ impl Serial {
         eonix_log::set_console(terminal.clone());
 
         CharDevice::register(
-            make_device(4, 64 + port.id),
+            DeviceId::new(4, 64 + port.id as u16),
             port.name.clone(),
             CharDeviceType::Terminal(terminal),
         )?;

+ 2 - 5
src/driver/virtio/riscv64.rs

@@ -1,8 +1,5 @@
 use super::virtio_blk::HAL;
-use crate::kernel::{
-    block::{make_device, BlockDevice},
-    task::block_on,
-};
+use crate::kernel::{block::BlockDevice, task::block_on, vfs::types::DeviceId};
 use alloc::{sync::Arc, vec::Vec};
 use eonix_hal::arch_exported::fdt::FDT;
 use eonix_hal::mm::ArchPhysAccess;
@@ -43,7 +40,7 @@ pub fn init() {
                         .expect("Failed to initialize VirtIO Block device");
 
                     let block_device = BlockDevice::register_disk(
-                        make_device(8, 16 * disk_id),
+                        DeviceId::new(8, 16 * disk_id),
                         2147483647,
                         Arc::new(Spin::new(block_device)),
                     )

+ 4 - 1
src/driver/virtio/virtio_blk.rs

@@ -7,6 +7,8 @@ use crate::{
     },
     prelude::KResult,
 };
+use alloc::boxed::Box;
+use async_trait::async_trait;
 use eonix_hal::mm::ArchPhysAccess;
 use eonix_mm::{
     address::{Addr, PAddr, PhysAccess},
@@ -74,6 +76,7 @@ unsafe impl Hal for HAL {
     }
 }
 
+#[async_trait]
 impl<T> BlockRequestQueue for Spin<VirtIOBlk<HAL, T>>
 where
     T: Transport + Send,
@@ -82,7 +85,7 @@ where
         1024
     }
 
-    fn submit(&self, req: BlockDeviceRequest) -> KResult<()> {
+    async fn submit<'a>(&'a self, req: BlockDeviceRequest<'a>) -> KResult<()> {
         match req {
             BlockDeviceRequest::Write {
                 sector,

+ 18 - 17
src/fs/ext4.rs

@@ -1,5 +1,3 @@
-use core::sync::atomic::{AtomicU32, AtomicU64, Ordering};
-
 use crate::kernel::mem::{CachePage, CachePageStream, PageCache, PageCacheBackend};
 use crate::kernel::task::block_on;
 use crate::kernel::timer::Ticks;
@@ -31,6 +29,8 @@ use alloc::{
 use another_ext4::{
     Block, BlockDevice as Ext4BlockDeviceTrait, Ext4, FileType, InodeMode, PBlockId,
 };
+use async_trait::async_trait;
+use core::sync::atomic::{AtomicU32, AtomicU64, Ordering};
 use eonix_sync::RwLock;
 
 pub struct Ext4BlockDevice {
@@ -194,7 +194,7 @@ impl Ext4Fs {
                         root_inode.inode.mtime_extra() as _,
                     )),
                     rwsem: RwLock::new(()),
-                    vfs: Arc::downgrade(&ext4fs) as _,
+                    sb: Arc::downgrade(&ext4fs) as _,
                 },
             )
         };
@@ -290,7 +290,7 @@ impl Inode for FileInode {
     }
 
     fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize> {
-        let vfs = self.vfs.upgrade().ok_or(EIO)?;
+        let vfs = self.sb.upgrade().ok_or(EIO)?;
         let ext4fs = vfs.as_any().downcast_ref::<Ext4Fs>().unwrap();
 
         let mut temp_buf = vec![0u8; buffer.total()];
@@ -334,7 +334,7 @@ impl Inode for FileInode {
     fn write_direct(&self, stream: &mut dyn Stream, offset: usize) -> KResult<usize> {
         //let _lock = Task::block_on(self.rwsem.write());
 
-        let vfs = self.vfs.upgrade().ok_or(EIO)?;
+        let vfs = self.sb.upgrade().ok_or(EIO)?;
         let ext4fs = vfs.as_any().downcast_ref::<Ext4Fs>().unwrap();
 
         let mut temp_buf = vec![0u8; 4096];
@@ -363,7 +363,7 @@ impl Inode for FileInode {
     fn chmod(&self, mode: Mode) -> KResult<()> {
         let _lock = block_on(self.rwsem.write());
 
-        let vfs = self.vfs.upgrade().ok_or(EIO)?;
+        let vfs = self.sb.upgrade().ok_or(EIO)?;
         let ext4fs = vfs.as_any().downcast_ref::<Ext4Fs>().unwrap();
         let old_mode = self.mode.load();
         let new_mode = old_mode.perm(mode.bits());
@@ -428,7 +428,7 @@ impl DirInode {
 
 impl Inode for DirInode {
     fn lookup(&self, dentry: &Arc<Dentry>) -> KResult<Option<Arc<dyn Inode>>> {
-        let vfs = self.vfs.upgrade().ok_or(EIO)?;
+        let vfs = self.sb.upgrade().ok_or(EIO)?;
         let ext4fs = vfs.as_any().downcast_ref::<Ext4Fs>().unwrap();
 
         let name = dentry.get_name();
@@ -477,7 +477,7 @@ impl Inode for DirInode {
                 ctime: Spin::new(Instant::new(attr.ctime as _, 0)),
                 mtime: Spin::new(Instant::new(attr.mtime as _, 0)),
                 rwsem: RwLock::new(()),
-                vfs: self.vfs.clone(),
+                sb: self.sb.clone(),
             },
         );
 
@@ -489,7 +489,7 @@ impl Inode for DirInode {
         offset: usize,
         callback: &mut dyn FnMut(&[u8], Ino) -> KResult<core::ops::ControlFlow<(), ()>>,
     ) -> KResult<usize> {
-        let vfs = self.vfs.upgrade().ok_or(EIO)?;
+        let vfs = self.sb.upgrade().ok_or(EIO)?;
         let ext4fs = vfs.as_any().downcast_ref::<Ext4Fs>().unwrap();
 
         let entries = ext4fs
@@ -519,7 +519,7 @@ impl Inode for DirInode {
     fn creat(&self, at: &Arc<Dentry>, mode: Mode) -> KResult<()> {
         let _lock = block_on(self.rwsem.write());
 
-        let vfs = self.vfs.upgrade().ok_or(EIO)?;
+        let vfs = self.sb.upgrade().ok_or(EIO)?;
         let ext4fs = vfs.as_any().downcast_ref::<Ext4Fs>().unwrap();
 
         let name = at.get_name();
@@ -534,7 +534,7 @@ impl Inode for DirInode {
             )
             .unwrap();
 
-        let file = FileInode::new(new_ino as u64, self.vfs.clone(), mode);
+        let file = FileInode::new(new_ino as u64, self.sb.clone(), mode);
         let now = Instant::now();
         self.update_child_time(file.as_ref(), now);
         self.link_file();
@@ -547,7 +547,7 @@ impl Inode for DirInode {
     fn mkdir(&self, at: &Dentry, mode: Mode) -> KResult<()> {
         let _lock = block_on(self.rwsem.write());
 
-        let vfs = self.vfs.upgrade().ok_or(EIO)?;
+        let vfs = self.sb.upgrade().ok_or(EIO)?;
         let ext4fs = vfs.as_any().downcast_ref::<Ext4Fs>().unwrap();
 
         let name = at.get_name();
@@ -562,7 +562,7 @@ impl Inode for DirInode {
             )
             .unwrap();
 
-        let new_dir = DirInode::new(new_ino as u64, self.vfs.clone(), mode);
+        let new_dir = DirInode::new(new_ino as u64, self.sb.clone(), mode);
         let now = Instant::now();
         self.update_child_time(new_dir.as_ref(), now);
         self.link_dir();
@@ -575,7 +575,7 @@ impl Inode for DirInode {
     fn unlink(&self, at: &Arc<Dentry>) -> KResult<()> {
         let _dir_lock = block_on(self.rwsem.write());
 
-        let vfs = self.vfs.upgrade().ok_or(EIO)?;
+        let vfs = self.sb.upgrade().ok_or(EIO)?;
         let ext4fs = vfs.as_any().downcast_ref::<Ext4Fs>().unwrap();
 
         let file = at.get_inode()?;
@@ -602,7 +602,7 @@ impl Inode for DirInode {
     fn chmod(&self, mode: Mode) -> KResult<()> {
         let _lock = block_on(self.rwsem.write());
 
-        let vfs = self.vfs.upgrade().ok_or(EIO)?;
+        let vfs = self.sb.upgrade().ok_or(EIO)?;
         let ext4fs = vfs.as_any().downcast_ref::<Ext4Fs>().unwrap();
         let old_mode = self.mode.load();
         let new_mode = old_mode.perm(mode.bits());
@@ -638,7 +638,7 @@ impl Inode for DirInode {
 
         // TODO: may need another lock
         let _lock = block_on(self.rwsem.write());
-        let vfs = self.vfs.upgrade().ok_or(EIO)?;
+        let vfs = self.sb.upgrade().ok_or(EIO)?;
         let ext4fs = vfs.as_any().downcast_ref::<Ext4Fs>().unwrap();
 
         let old_file = old_dentry.get_inode()?;
@@ -698,6 +698,7 @@ impl From<InodeMode> for Mode {
 
 struct Ext4MountCreator;
 
+#[async_trait]
 impl MountCreator for Ext4MountCreator {
     fn check_signature(&self, mut first_block: &[u8]) -> KResult<bool> {
         match first_block.split_off(1080..) {
@@ -707,7 +708,7 @@ impl MountCreator for Ext4MountCreator {
         }
     }
 
-    fn create_mount(&self, source: &str, _flags: u64, mp: &Arc<Dentry>) -> KResult<Mount> {
+    async fn create_mount(&self, source: &str, _flags: u64, mp: &Arc<Dentry>) -> KResult<Mount> {
         let source = source.as_bytes();
 
         let path = Path::new(source)?;

+ 337 - 230
src/fs/fat32.rs

@@ -1,41 +1,72 @@
 mod dir;
 mod file;
 
-use crate::io::Stream;
-use crate::kernel::constants::EIO;
+use core::future::Future;
+use core::ops::Deref;
+
+use alloc::sync::{Arc, Weak};
+use async_trait::async_trait;
+use dir::{as_raw_dirents, ParseDirent};
+use eonix_sync::RwLock;
+use itertools::Itertools;
+
+use crate::kernel::constants::{EINVAL, EIO};
 use crate::kernel::mem::{AsMemoryBlock, CachePageStream};
-use crate::kernel::task::block_on;
-use crate::kernel::vfs::inode::{Mode, WriteOffset};
+use crate::kernel::timer::Instant;
+use crate::kernel::vfs::inode::{InodeDirOps, InodeFileOps, InodeInfo, InodeOps, InodeUse};
+use crate::kernel::vfs::types::{DeviceId, Format, Permission};
+use crate::kernel::vfs::{SbRef, SbUse, SuperBlock, SuperBlockInfo};
+use crate::prelude::*;
 use crate::{
     io::{Buffer, ByteBuffer, UninitBuffer},
     kernel::{
-        block::{make_device, BlockDevice, BlockDeviceRequest},
+        block::{BlockDevice, BlockDeviceRequest},
         mem::{
             paging::Page,
-            {CachePage, PageCache, PageCacheBackend},
+            {CachePage, PageCache, PageCacheBackendOps},
         },
         vfs::{
             dentry::Dentry,
-            inode::{define_struct_inode, Ino, Inode, InodeData},
+            inode::{Ino, Inode},
             mount::{register_filesystem, Mount, MountCreator},
-            vfs::Vfs,
-            DevId,
         },
     },
-    prelude::*,
     KResult,
 };
-use alloc::{
-    collections::btree_map::BTreeMap,
-    sync::{Arc, Weak},
-    vec::Vec,
-};
-use core::{ops::ControlFlow, sync::atomic::Ordering};
-use dir::Dirs as _;
-use eonix_sync::RwLock;
-use file::ClusterRead;
 
-type ClusterNo = u32;
+#[repr(transparent)]
+#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
+struct Cluster(u32);
+
+#[repr(transparent)]
+#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
+struct RawCluster(pub u32);
+
+impl RawCluster {
+    const START: u32 = 2;
+    const EOC: u32 = 0x0FFF_FFF8;
+    const INVL: u32 = 0xF000_0000;
+
+    fn parse(self) -> Option<Cluster> {
+        match self.0 {
+            ..Self::START | Self::EOC..Self::INVL => None,
+            Self::INVL.. => {
+                unreachable!("invalid cluster number: RawCluster({:#08x})", self.0)
+            }
+            no => Some(Cluster(no)),
+        }
+    }
+}
+
+impl Cluster {
+    pub fn as_ino(self) -> Ino {
+        Ino::new(self.0 as _)
+    }
+
+    fn normalized(self) -> Self {
+        Self(self.0 - 2)
+    }
+}
 
 const SECTOR_SIZE: usize = 512;
 
@@ -59,7 +90,7 @@ struct Bootsector {
     sectors_per_fat: u32,
     flags: u16,
     fat_version: u16,
-    root_cluster: ClusterNo,
+    root_cluster: RawCluster,
     fsinfo_sector: u16,
     backup_bootsector: u16,
     _reserved: [u8; 12],
@@ -73,222 +104,203 @@ struct Bootsector {
     mbr_signature: u16,
 }
 
-impl_any!(FatFs);
 /// # Lock order
 /// 2. FatTable
 /// 3. Inodes
 ///
 struct FatFs {
     sectors_per_cluster: u8,
-    rootdir_cluster: ClusterNo,
-    data_start: u64,
-    volume_label: [u8; 11],
+    data_start_sector: u64,
+    _rootdir_cluster: Cluster,
+    _volume_label: Box<str>,
 
     device: Arc<BlockDevice>,
-    fat: RwLock<Vec<ClusterNo>>,
-    weak: Weak<FatFs>,
-    icache: BTreeMap<Ino, FatInode>,
+    fat: RwLock<Box<[RawCluster]>>,
 }
 
-impl Vfs for FatFs {
-    fn io_blksize(&self) -> usize {
-        4096
-    }
-
-    fn fs_devid(&self) -> DevId {
-        self.device.devid()
-    }
-
-    fn is_read_only(&self) -> bool {
-        true
-    }
-}
+impl SuperBlock for FatFs {}
 
 impl FatFs {
-    fn read_cluster(&self, cluster: ClusterNo, buf: &Page) -> KResult<()> {
-        let cluster = cluster - 2;
+    async fn read_cluster(&self, mut cluster: Cluster, buf: &Page) -> KResult<()> {
+        cluster = cluster.normalized();
 
         let rq = BlockDeviceRequest::Read {
-            sector: self.data_start as u64 + cluster as u64 * self.sectors_per_cluster as u64,
+            sector: self.data_start_sector as u64
+                + cluster.0 as u64 * self.sectors_per_cluster as u64,
             count: self.sectors_per_cluster as u64,
             buffer: core::slice::from_ref(buf),
         };
-        self.device.commit_request(rq)?;
 
+        self.device.commit_request(rq).await?;
         Ok(())
     }
-
-    fn get_or_alloc_inode(&self, ino: Ino, is_directory: bool, size: u32) -> Arc<dyn Inode> {
-        self.icache
-            .get(&ino)
-            .cloned()
-            .map(FatInode::unwrap)
-            .unwrap_or_else(|| {
-                if is_directory {
-                    DirInode::new(ino, self.weak.clone(), size)
-                } else {
-                    FileInode::new(ino, self.weak.clone(), size)
-                }
-            })
-    }
 }
 
 impl FatFs {
-    pub fn create(device: DevId) -> KResult<(Arc<Self>, Arc<dyn Inode>)> {
+    pub async fn create(device: DeviceId) -> KResult<(SbUse<Self>, InodeUse<dyn Inode>)> {
         let device = BlockDevice::get(device)?;
-        let mut fatfs_arc = Arc::new_cyclic(|weak: &Weak<FatFs>| Self {
-            device,
-            sectors_per_cluster: 0,
-            rootdir_cluster: 0,
-            data_start: 0,
-            fat: RwLock::new(Vec::new()),
-            weak: weak.clone(),
-            icache: BTreeMap::new(),
-            volume_label: [0; 11],
-        });
 
-        let fatfs = unsafe { Arc::get_mut_unchecked(&mut fatfs_arc) };
-
-        let mut info: UninitBuffer<Bootsector> = UninitBuffer::new();
-        fatfs.device.read_some(0, &mut info)?.ok_or(EIO)?;
+        let mut info = UninitBuffer::<Bootsector>::new();
+        device.read_some(0, &mut info).await?.ok_or(EIO)?;
         let info = info.assume_filled_ref()?;
 
-        fatfs.sectors_per_cluster = info.sectors_per_cluster;
-        fatfs.rootdir_cluster = info.root_cluster;
-        fatfs.data_start =
-            info.reserved_sectors as u64 + info.fat_copies as u64 * info.sectors_per_fat as u64;
-
-        let fat = fatfs.fat.get_mut();
-
-        fat.resize(
-            512 * info.sectors_per_fat as usize / core::mem::size_of::<ClusterNo>(),
-            0,
+        let mut fat = Box::new_uninit_slice(
+            512 * info.sectors_per_fat as usize / core::mem::size_of::<Cluster>(),
         );
 
-        let mut buffer = ByteBuffer::from(fat.as_mut_slice());
-
-        fatfs
-            .device
-            .read_some(info.reserved_sectors as usize * 512, &mut buffer)?
+        device
+            .read_some(
+                info.reserved_sectors as usize * 512,
+                &mut ByteBuffer::from(fat.as_mut()),
+            )
+            .await?
             .ok_or(EIO)?;
 
-        info.volume_label
-            .iter()
-            .take_while(|&&c| c != ' ' as u8)
-            .take(11)
-            .enumerate()
-            .for_each(|(idx, c)| fatfs.volume_label[idx] = *c);
+        let sectors_per_cluster = info.sectors_per_cluster;
+        let rootdir_cluster = info.root_cluster.parse().ok_or(EINVAL)?;
 
-        let root_dir_cluster_count = ClusterIterator::new(fat, fatfs.rootdir_cluster).count();
-        let root_dir_size = root_dir_cluster_count as u32 * info.sectors_per_cluster as u32 * 512;
+        let data_start_sector =
+            info.reserved_sectors as u64 + info.fat_copies as u64 * info.sectors_per_fat as u64;
+
+        let volume_label = {
+            let end = info
+                .volume_label
+                .iter()
+                .position(|&c| c == b' ')
+                .unwrap_or(info.volume_label.len());
+
+            String::from_utf8_lossy(&info.volume_label[..end])
+                .into_owned()
+                .into_boxed_str()
+        };
 
-        let root_inode = DirInode::new(
-            (info.root_cluster & !0xF000_0000) as Ino,
-            fatfs.weak.clone(),
-            root_dir_size,
+        let fat = unsafe { fat.assume_init() };
+
+        let rootdir_cluster_count = ClusterIterator::new(fat.as_ref(), rootdir_cluster).count();
+        let rootdir_size = rootdir_cluster_count as u32 * sectors_per_cluster as u32 * 512;
+
+        let fatfs = SbUse::new(
+            SuperBlockInfo {
+                io_blksize: 4096,
+                device_id: device.devid(),
+                read_only: true,
+            },
+            Self {
+                device,
+                sectors_per_cluster,
+                _rootdir_cluster: rootdir_cluster,
+                data_start_sector,
+                fat: RwLock::new(fat),
+                _volume_label: volume_label,
+            },
         );
 
-        Ok((fatfs_arc, root_inode))
+        let sbref = SbRef::from(&fatfs);
+        Ok((fatfs, DirInode::new(rootdir_cluster, sbref, rootdir_size)))
     }
 }
 
-struct ClusterIterator<'fat> {
-    fat: &'fat [ClusterNo],
-    cur: ClusterNo,
+struct ClusterIterator<'a> {
+    fat: &'a [RawCluster],
+    cur: Option<Cluster>,
 }
 
-impl<'fat> ClusterIterator<'fat> {
-    fn new(fat: &'fat [ClusterNo], start: ClusterNo) -> Self {
-        Self { fat, cur: start }
+impl<'a> ClusterIterator<'a> {
+    fn new(fat: &'a [RawCluster], start: Cluster) -> Self {
+        Self {
+            fat,
+            cur: Some(start),
+        }
     }
 }
 
 impl<'fat> Iterator for ClusterIterator<'fat> {
-    type Item = ClusterNo;
+    type Item = Cluster;
 
     fn next(&mut self) -> Option<Self::Item> {
-        const EOC: ClusterNo = 0x0FFF_FFF8;
-        const INVL: ClusterNo = 0xF000_0000;
-
-        match self.cur {
-            ..2 | EOC..INVL => None,
-            INVL.. => unreachable!("Invalid cluster number: {}", self.cur),
-            next => {
-                self.cur = self.fat[next as usize] & !INVL;
-                Some(next)
-            }
-        }
+        self.cur.inspect(|&Cluster(no)| {
+            self.cur = self.fat[no as usize].parse();
+        })
     }
 }
 
-#[allow(dead_code)]
-#[derive(Clone)]
-enum FatInode {
-    File(Arc<FileInode>),
-    Dir(Arc<DirInode>),
+struct FileInode {
+    cluster: Cluster,
+    info: Spin<InodeInfo>,
+    sb: SbRef<FatFs>,
+    page_cache: PageCache,
 }
 
-impl FatInode {
-    fn unwrap(self) -> Arc<dyn Inode> {
-        match self {
-            FatInode::File(inode) => inode,
-            FatInode::Dir(inode) => inode,
-        }
+impl FileInode {
+    fn new(cluster: Cluster, sb: SbRef<FatFs>, size: u32) -> InodeUse<FileInode> {
+        InodeUse::new_cyclic(|weak: &Weak<FileInode>| Self {
+            cluster,
+            info: Spin::new(InodeInfo {
+                size: size as u64,
+                nlink: 1,
+                uid: 0,
+                gid: 0,
+                perm: Permission::new(0o777),
+                atime: Instant::UNIX_EPOCH,
+                ctime: Instant::UNIX_EPOCH,
+                mtime: Instant::UNIX_EPOCH,
+            }),
+            sb,
+            page_cache: PageCache::new(weak.clone()),
+        })
     }
 }
 
-define_struct_inode! {
-    struct FileInode {
-        page_cache: PageCache,
+impl InodeOps for FileInode {
+    type SuperBlock = FatFs;
+
+    fn ino(&self) -> Ino {
+        self.cluster.as_ino()
     }
-}
 
-impl FileInode {
-    fn new(ino: Ino, weak: Weak<FatFs>, size: u32) -> Arc<Self> {
-        let inode = Arc::new_cyclic(|weak_self: &Weak<FileInode>| Self {
-            idata: InodeData::new(ino, weak),
-            page_cache: PageCache::new(weak_self.clone()),
-        });
+    fn format(&self) -> Format {
+        Format::REG
+    }
 
-        // Safety: We are initializing the inode
-        inode.nlink.store(1, Ordering::Relaxed);
-        inode.mode.store(Mode::REG.perm(0o777));
-        inode.size.store(size as u64, Ordering::Relaxed);
+    fn info(&self) -> &Spin<InodeInfo> {
+        &self.info
+    }
 
-        inode
+    fn super_block(&self) -> &SbRef<Self::SuperBlock> {
+        &self.sb
     }
-}
 
-impl Inode for FileInode {
     fn page_cache(&self) -> Option<&PageCache> {
         Some(&self.page_cache)
     }
+}
 
-    fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize> {
-        block_on(self.page_cache.read(buffer, offset))
+impl InodeDirOps for FileInode {}
+impl InodeFileOps for FileInode {
+    async fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize> {
+        self.page_cache.read(buffer, offset).await
     }
 
-    fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize> {
-        let vfs = self.vfs.upgrade().ok_or(EIO)?;
-        let vfs = vfs.as_any().downcast_ref::<FatFs>().unwrap();
-        let fat = block_on(vfs.fat.read());
+    async fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize> {
+        let sb = self.sb.get()?;
+        let fs = &sb.backend;
+        let fat = sb.backend.fat.read().await;
 
-        if self.size.load(Ordering::Relaxed) as usize == 0 {
+        if offset >= self.info.lock().size as usize {
             return Ok(0);
         }
 
-        let cluster_size = vfs.sectors_per_cluster as usize * SECTOR_SIZE;
+        let cluster_size = fs.sectors_per_cluster as usize * SECTOR_SIZE;
         assert!(cluster_size <= 0x1000, "Cluster size is too large");
 
         let skip_clusters = offset / cluster_size;
         let inner_offset = offset % cluster_size;
 
-        let cluster_iter =
-            ClusterIterator::new(fat.as_ref(), self.ino as ClusterNo).skip(skip_clusters);
+        let cluster_iter = ClusterIterator::new(fat.as_ref(), self.cluster).skip(skip_clusters);
 
         let buffer_page = Page::alloc();
         for cluster in cluster_iter {
-            vfs.read_cluster(cluster, &buffer_page)?;
+            fs.read_cluster(cluster, &buffer_page).await?;
 
             let data = unsafe {
                 // SAFETY: We are the only one holding this page.
@@ -296,7 +308,7 @@ impl Inode for FileInode {
             };
 
             let end = offset + data.len();
-            let real_end = core::cmp::min(end, self.size.load(Ordering::Relaxed) as usize);
+            let real_end = end.min(self.info.lock().size as usize);
             let real_size = real_end - offset;
 
             if buffer.fill(&data[..real_size])?.should_stop() {
@@ -306,108 +318,203 @@ impl Inode for FileInode {
 
         Ok(buffer.wrote())
     }
+}
 
-    fn write(&self, _stream: &mut dyn Stream, _offset: WriteOffset) -> KResult<usize> {
-        todo!()
+impl PageCacheBackendOps for FileInode {
+    async fn read_page(&self, page: &mut CachePage, offset: usize) -> KResult<usize> {
+        self.read_direct(page, offset).await
     }
 
-    fn write_direct(&self, _stream: &mut dyn Stream, _offset: usize) -> KResult<usize> {
+    async fn write_page(&self, _page: &mut CachePageStream, _offset: usize) -> KResult<usize> {
         todo!()
     }
+
+    fn size(&self) -> usize {
+        self.info.lock().size as usize
+    }
 }
 
-impl PageCacheBackend for FileInode {
-    fn read_page(&self, page: &mut CachePage, offset: usize) -> KResult<usize> {
-        self.read_direct(page, offset)
+struct DirInode {
+    cluster: Cluster,
+    info: Spin<InodeInfo>,
+    sb: SbRef<FatFs>,
+
+    // TODO: Use the new PageCache...
+    dir_pages: RwLock<Vec<Page>>,
+}
+
+impl DirInode {
+    fn new(cluster: Cluster, sb: SbRef<FatFs>, size: u32) -> InodeUse<Self> {
+        InodeUse::new(Self {
+            cluster,
+            info: Spin::new(InodeInfo {
+                size: size as u64,
+                nlink: 2, // '.' and '..'
+                uid: 0,
+                gid: 0,
+                perm: Permission::new(0o777),
+                atime: Instant::UNIX_EPOCH,
+                ctime: Instant::UNIX_EPOCH,
+                mtime: Instant::UNIX_EPOCH,
+            }),
+            sb,
+            dir_pages: RwLock::new(Vec::new()),
+        })
     }
 
-    fn write_page(&self, _page: &mut CachePageStream, _offset: usize) -> KResult<usize> {
-        todo!()
+    async fn read_dir_pages(&self) -> KResult<()> {
+        let mut dir_pages = self.dir_pages.write().await;
+        if !dir_pages.is_empty() {
+            return Ok(());
+        }
+
+        let sb = self.sb.get()?;
+        let fs = &sb.backend;
+        let fat = fs.fat.read().await;
+
+        let clusters = ClusterIterator::new(fat.as_ref(), self.cluster);
+
+        for cluster in clusters {
+            let page = Page::alloc();
+            fs.read_cluster(cluster, &page).await?;
+
+            dir_pages.push(page);
+        }
+
+        Ok(())
     }
 
-    fn size(&self) -> usize {
-        self.size.load(Ordering::Relaxed) as usize
+    async fn get_dir_pages(&self) -> KResult<impl Deref<Target = Vec<Page>> + use<'_>> {
+        {
+            let dir_pages = self.dir_pages.read().await;
+            if !dir_pages.is_empty() {
+                return Ok(dir_pages);
+            }
+        }
+
+        self.read_dir_pages().await?;
+
+        if let Some(dir_pages) = self.dir_pages.try_read() {
+            return Ok(dir_pages);
+        }
+
+        Ok(self.dir_pages.read().await)
     }
 }
 
-define_struct_inode! {
-    struct DirInode;
-}
+impl InodeOps for DirInode {
+    type SuperBlock = FatFs;
 
-impl DirInode {
-    fn new(ino: Ino, weak: Weak<FatFs>, size: u32) -> Arc<Self> {
-        let inode = Arc::new(Self {
-            idata: InodeData::new(ino, weak),
-        });
+    fn ino(&self) -> Ino {
+        self.cluster.as_ino()
+    }
+
+    fn format(&self) -> Format {
+        Format::DIR
+    }
 
-        // Safety: We are initializing the inode
-        inode.nlink.store(2, Ordering::Relaxed);
-        inode.mode.store(Mode::DIR.perm(0o777));
-        inode.size.store(size as u64, Ordering::Relaxed);
+    fn info(&self) -> &Spin<InodeInfo> {
+        &self.info
+    }
 
-        inode
+    fn super_block(&self) -> &SbRef<Self::SuperBlock> {
+        &self.sb
+    }
+
+    fn page_cache(&self) -> Option<&PageCache> {
+        None
     }
 }
 
-impl Inode for DirInode {
-    fn lookup(&self, dentry: &Arc<Dentry>) -> KResult<Option<Arc<dyn Inode>>> {
-        let vfs = self.vfs.upgrade().ok_or(EIO)?;
-        let vfs = vfs.as_any().downcast_ref::<FatFs>().unwrap();
-        let fat = block_on(vfs.fat.read());
-
-        let mut entries = ClusterIterator::new(fat.as_ref(), self.ino as ClusterNo)
-            .read(vfs, 0)
-            .dirs();
-
-        let entry = entries.find(|entry| {
-            entry
-                .as_ref()
-                .map(|entry| &entry.filename == &***dentry.name())
-                .unwrap_or(true)
+impl InodeFileOps for DirInode {}
+impl InodeDirOps for DirInode {
+    async fn lookup(&self, dentry: &Arc<Dentry>) -> KResult<Option<InodeUse<dyn Inode>>> {
+        let sb = self.sb.get()?;
+        let dir_pages = self.get_dir_pages().await?;
+
+        let dir_data = dir_pages.iter().map(|page| {
+            unsafe {
+                // SAFETY: No one could be writing to it.
+                page.as_memblk().as_bytes()
+            }
         });
 
-        match entry {
-            None => Ok(None),
-            Some(Err(err)) => Err(err),
-            Some(Ok(entry)) => Ok(Some(vfs.get_or_alloc_inode(
-                entry.cluster as Ino,
-                entry.is_directory,
-                entry.size,
-            ))),
+        let raw_dirents = dir_data
+            .map(as_raw_dirents)
+            .take_while_inclusive(Result::is_ok)
+            .flatten_ok();
+
+        let mut dirents = futures::stream::iter(raw_dirents);
+
+        while let Some(result) = dirents.next_dirent().await {
+            let entry = result?;
+
+            if *entry.filename != ****dentry.name() {
+                continue;
+            }
+
+            let sbref = SbRef::from(&sb);
+
+            if entry.is_directory {
+                return Ok(Some(DirInode::new(entry.cluster, sbref, entry.size) as _));
+            } else {
+                return Ok(Some(FileInode::new(entry.cluster, sbref, entry.size) as _));
+            }
         }
+
+        Ok(None)
     }
 
-    fn do_readdir(
-        &self,
+    fn readdir<'r, 'a: 'r, 'b: 'r>(
+        &'a self,
         offset: usize,
-        callback: &mut dyn FnMut(&[u8], Ino) -> KResult<ControlFlow<(), ()>>,
-    ) -> KResult<usize> {
-        let vfs = self.vfs.upgrade().ok_or(EIO)?;
-        let vfs = vfs.as_any().downcast_ref::<FatFs>().unwrap();
-        let fat = block_on(vfs.fat.read());
-
-        let cluster_iter = ClusterIterator::new(fat.as_ref(), self.ino as ClusterNo)
-            .read(vfs, offset)
-            .dirs();
-
-        let mut nread = 0usize;
-        for entry in cluster_iter {
-            let entry = entry?;
-
-            vfs.get_or_alloc_inode(entry.cluster as Ino, entry.is_directory, entry.size);
-            if callback(&entry.filename, entry.cluster as Ino)?.is_break() {
-                break;
+        callback: &'b mut (dyn FnMut(&[u8], Ino) -> KResult<bool> + Send),
+    ) -> impl Future<Output = KResult<KResult<usize>>> + Send + 'r {
+        async move {
+            let sb = self.sb.get()?;
+            let fs = &sb.backend;
+            let dir_pages = self.get_dir_pages().await?;
+
+            let cluster_size = fs.sectors_per_cluster as usize * SECTOR_SIZE;
+
+            let cluster_offset = offset / cluster_size;
+            let inner_offset = offset % cluster_size;
+            let inner_raw_dirent_offset = inner_offset / core::mem::size_of::<dir::RawDirEntry>();
+
+            let dir_data = dir_pages.iter().skip(cluster_offset).map(|page| {
+                unsafe {
+                    // SAFETY: No one could be writing to it.
+                    page.as_memblk().as_bytes()
+                }
+            });
+
+            let raw_dirents = dir_data
+                .map(as_raw_dirents)
+                .take_while_inclusive(Result::is_ok)
+                .flatten_ok()
+                .skip(inner_raw_dirent_offset);
+
+            let mut dirents = futures::stream::iter(raw_dirents);
+
+            let mut nread = 0;
+            while let Some(result) = dirents.next_dirent().await {
+                let entry = result?;
+
+                match callback(&entry.filename, entry.cluster.as_ino()) {
+                    Err(err) => return Ok(Err(err)),
+                    Ok(true) => nread += entry.entry_offset as usize,
+                    Ok(false) => break,
+                }
             }
 
-            nread += entry.entry_offset as usize;
+            Ok(Ok(nread))
         }
-
-        Ok(nread)
     }
 }
 
 struct FatMountCreator;
 
+#[async_trait]
 impl MountCreator for FatMountCreator {
     fn check_signature(&self, mut first_block: &[u8]) -> KResult<bool> {
         match first_block.split_off(82..) {
@@ -417,8 +524,8 @@ impl MountCreator for FatMountCreator {
         }
     }
 
-    fn create_mount(&self, _source: &str, _flags: u64, mp: &Arc<Dentry>) -> KResult<Mount> {
-        let (fatfs, root_inode) = FatFs::create(make_device(8, 1))?;
+    async fn create_mount(&self, _source: &str, _flags: u64, mp: &Arc<Dentry>) -> KResult<Mount> {
+        let (fatfs, root_inode) = FatFs::create(DeviceId::new(8, 1)).await?;
 
         Mount::new(mp, fatfs, root_inode)
     }

+ 98 - 116
src/fs/fat32/dir.rs

@@ -1,11 +1,16 @@
-use super::file::ClusterReadIterator;
+use core::pin::Pin;
+
+use alloc::{boxed::Box, string::String};
+use futures::{Stream, StreamExt};
+use posix_types::result::PosixError;
+
 use crate::kernel::constants::EINVAL;
 use crate::prelude::*;
-use alloc::{string::String, sync::Arc};
-use itertools::Itertools;
+
+use super::{Cluster, RawCluster};
 
 #[repr(C, packed)]
-pub(super) struct RawDirEntry {
+pub struct RawDirEntry {
     name: [u8; 8],
     extension: [u8; 3],
     attr: u8,
@@ -21,9 +26,9 @@ pub(super) struct RawDirEntry {
     size: u32,
 }
 
-pub(super) struct FatDirectoryEntry {
-    pub filename: Arc<[u8]>,
-    pub cluster: u32,
+pub struct FatDirectoryEntry {
+    pub filename: Box<[u8]>,
+    pub cluster: Cluster,
     pub size: u32,
     pub entry_offset: u32,
     pub is_directory: bool,
@@ -79,7 +84,7 @@ impl RawDirEntry {
         self.attr & Self::ATTR_DIRECTORY != 0
     }
 
-    fn long_filename(&self) -> Option<[u16; 13]> {
+    fn as_raw_long_filename(&self) -> Option<[u16; 13]> {
         if !self.is_long_filename() {
             return None;
         }
@@ -103,137 +108,114 @@ impl RawDirEntry {
     }
 }
 
-impl<'data, I> RawDirs<'data> for I where I: ClusterReadIterator<'data> {}
-trait RawDirs<'data>: ClusterReadIterator<'data> {
-    fn raw_dirs(self) -> impl Iterator<Item = KResult<&'data RawDirEntry>> + 'data
-    where
-        Self: Sized,
-    {
-        const ENTRY_SIZE: usize = size_of::<RawDirEntry>();
-
-        self.map(|result| {
-            let data = result?;
-            if data.len() % ENTRY_SIZE != 0 {
-                return Err(EINVAL);
-            }
-
-            Ok(unsafe {
-                core::slice::from_raw_parts(
-                    data.as_ptr() as *const RawDirEntry,
-                    data.len() / ENTRY_SIZE,
-                )
-            })
-        })
-        .flatten_ok()
+pub fn as_raw_dirents(data: &[u8]) -> KResult<&[RawDirEntry]> {
+    let len = data.len();
+    if len % size_of::<RawDirEntry>() != 0 {
+        return Err(EINVAL);
     }
-}
-
-pub(super) trait Dirs<'data>: ClusterReadIterator<'data> {
-    fn dirs(self) -> impl Iterator<Item = KResult<FatDirectoryEntry>> + 'data
-    where
-        Self: Sized;
-}
 
-impl<'data, I> Dirs<'data> for I
-where
-    I: ClusterReadIterator<'data>,
-{
-    fn dirs(self) -> impl Iterator<Item = KResult<FatDirectoryEntry>> + 'data
-    where
-        Self: Sized,
-    {
-        self.raw_dirs().real_dirs()
+    unsafe {
+        Ok(core::slice::from_raw_parts(
+            data.as_ptr() as *const RawDirEntry,
+            len / size_of::<RawDirEntry>(),
+        ))
     }
 }
 
-trait RealDirs<'data>: Iterator<Item = KResult<&'data RawDirEntry>> + 'data {
-    fn real_dirs(self) -> DirsIter<'data, Self>
-    where
-        Self: Sized;
+pub trait ParseDirent {
+    async fn next_dirent(&mut self) -> Option<KResult<FatDirectoryEntry>>;
 }
 
-impl<'data, I> RealDirs<'data> for I
+impl<'a, T> ParseDirent for T
 where
-    I: Iterator<Item = KResult<&'data RawDirEntry>> + 'data,
+    T: Stream<Item = KResult<&'a RawDirEntry>>,
 {
-    fn real_dirs(self) -> DirsIter<'data, Self>
-    where
-        Self: Sized,
-    {
-        DirsIter { iter: self }
-    }
-}
+    async fn next_dirent(&mut self) -> Option<KResult<FatDirectoryEntry>> {
+        let mut me = unsafe { Pin::new_unchecked(self) };
+
+        // The long filename entries are stored in reverse order.
+        // So we reverse all filename segments and then reverse the whole string at the end.
+        let mut filename_rev = String::new();
+
+        let mut is_lfn = false;
+        let mut nr_entry_scanned = 0;
+        let mut cur_entry;
+
+        loop {
+            match me.as_mut().next().await {
+                Some(Err(err)) => return Some(Err(err)),
+                Some(Ok(ent)) => {
+                    cur_entry = ent;
+                    nr_entry_scanned += 1;
+                }
+                None => {
+                    if is_lfn {
+                        // Unterminated long filename entries are invalid.
+                        return Some(Err(PosixError::EINVAL.into()));
+                    } else {
+                        return None;
+                    }
+                }
+            };
 
-pub(super) struct DirsIter<'data, I>
-where
-    I: Iterator<Item = KResult<&'data RawDirEntry>> + 'data,
-{
-    iter: I,
-}
+            if !cur_entry.is_invalid() {
+                break;
+            }
 
-impl<'data, I> Iterator for DirsIter<'data, I>
-where
-    I: Iterator<Item = KResult<&'data RawDirEntry>> + 'data,
-{
-    type Item = KResult<FatDirectoryEntry>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        let mut filename = String::new();
-        let mut entry_offset = 0;
-        let entry = loop {
-            let entry = match self.iter.next()? {
-                Ok(entry) => entry,
-                Err(err) => return Some(Err(err)),
-            };
-            entry_offset += 1;
-
-            let long_filename = entry.long_filename();
-            if entry.is_invalid() {
-                if let Some(long_filename) = long_filename {
-                    let long_filename = long_filename
-                        .iter()
-                        .position(|&ch| ch == 0)
-                        .map(|pos| &long_filename[..pos])
-                        .unwrap_or(&long_filename);
-
-                    filename.extend(
-                        long_filename
-                            .into_iter()
-                            .map(|&ch| char::from_u32(ch as u32).unwrap_or('?'))
-                            .rev(),
-                    );
-                }
+            let Some(raw_long_filename) = cur_entry.as_raw_long_filename() else {
                 continue;
-            }
-            break entry;
+            };
+
+            // We are processing a long filename entry.
+            is_lfn = true;
+
+            let real_len = raw_long_filename
+                .iter()
+                .position(|&ch| ch == 0)
+                .unwrap_or(raw_long_filename.len());
+
+            let name_codes_rev = raw_long_filename.into_iter().take(real_len).rev();
+            let name_chars_rev = char::decode_utf16(name_codes_rev).map(|r| r.unwrap_or('?'));
+
+            filename_rev.extend(name_chars_rev);
+        }
+
+        // From now on, `entry` represents a valid directory entry.
+
+        let raw_cluster =
+            RawCluster(cur_entry.cluster_low as u32 | ((cur_entry.cluster_high as u32) << 16));
+
+        let Some(cluster) = raw_cluster.parse() else {
+            return Some(Err(PosixError::EINVAL.into()));
         };
 
-        let filename: Arc<[u8]> = if filename.is_empty() {
-            let mut filename = entry.filename().to_vec();
-            let extension = entry.extension();
+        let filename;
+
+        if filename_rev.is_empty() {
+            let mut name = cur_entry.filename().to_vec();
+            let extension = cur_entry.extension();
             if !extension.is_empty() {
-                filename.push(b'.');
-                filename.extend_from_slice(extension);
+                name.push(b'.');
+                name.extend_from_slice(extension);
             }
 
-            if entry.is_filename_lowercase() {
-                filename.make_ascii_lowercase();
+            if cur_entry.is_filename_lowercase() {
+                name.make_ascii_lowercase();
             }
 
-            filename.into()
+            filename = name.into_boxed_slice();
         } else {
-            let mut bytes = filename.into_bytes();
-            bytes.reverse();
-
-            bytes.into()
-        };
+            let mut name = filename_rev.into_bytes();
+            name.reverse();
+            filename = name.into_boxed_slice();
+        }
 
         Some(Ok(FatDirectoryEntry {
-            size: entry.size,
-            entry_offset,
+            size: cur_entry.size,
+            entry_offset: nr_entry_scanned * size_of::<RawDirEntry>() as u32,
             filename,
-            cluster: entry.cluster_low as u32 | (((entry.cluster_high & !0xF000) as u32) << 16),
-            is_directory: entry.is_directory(),
+            cluster,
+            is_directory: cur_entry.is_directory(),
         }))
     }
 }

+ 15 - 31
src/fs/fat32/file.rs

@@ -1,40 +1,24 @@
-use super::{ClusterIterator, FatFs};
-use crate::{
-    kernel::mem::{AsMemoryBlock as _, Page},
-    KResult,
-};
-
-pub trait ClusterReadIterator<'data>: Iterator<Item = KResult<&'data [u8]>> + 'data {}
-impl<'a, I> ClusterReadIterator<'a> for I where I: Iterator<Item = KResult<&'a [u8]>> + 'a {}
+use futures::Stream;
 
-pub(super) trait ClusterRead<'data> {
-    fn read<'vfs>(self, vfs: &'vfs FatFs, offset: usize) -> impl ClusterReadIterator<'data>
-    where
-        Self: Sized,
-        'vfs: 'data;
-}
+use crate::{kernel::mem::Page, prelude::KResult};
 
-impl<'data, 'fat: 'data> ClusterRead<'data> for ClusterIterator<'fat> {
-    fn read<'vfs: 'data>(self, vfs: &'vfs FatFs, offset: usize) -> impl ClusterReadIterator<'data> {
-        const SECTOR_SIZE: usize = 512;
+use super::{ClusterIterator, FatFs};
 
-        let cluster_size = vfs.sectors_per_cluster as usize * SECTOR_SIZE;
-        assert!(cluster_size <= 0x1000, "Cluster size is too large");
+pub trait ReadClusters {
+    fn read_clusters(self, fs: &FatFs) -> impl Stream<Item = KResult<Page>> + Send;
+}
 
-        let skip_clusters = offset / cluster_size;
-        let mut inner_offset = offset % cluster_size;
+impl ReadClusters for ClusterIterator<'_> {
+    fn read_clusters(self, fs: &FatFs) -> impl Stream<Item = KResult<Page>> + Send {
+        futures::stream::unfold(self, move |mut me| async {
+            let cluster = me.next()?;
+            let page = Page::alloc();
 
-        // TODO: Use block cache.
-        let buffer_page = Page::alloc();
+            if let Err(err) = fs.read_cluster(cluster, &page).await {
+                return Some((Err(err), me));
+            }
 
-        self.skip(skip_clusters).map(move |cluster| {
-            vfs.read_cluster(cluster, &buffer_page)?;
-            let data = unsafe {
-                // SAFETY: No one could be writing to it.
-                &buffer_page.as_memblk().as_bytes()[inner_offset..]
-            };
-            inner_offset = 0;
-            Ok(data)
+            Some((Ok(page), me))
         })
     }
 }

+ 1 - 2
src/fs/mod.rs

@@ -1,5 +1,4 @@
+// pub mod ext4;
 pub mod fat32;
 pub mod procfs;
-pub mod shm;
 pub mod tmpfs;
-pub mod ext4;

+ 188 - 249
src/fs/procfs.rs

@@ -1,325 +1,264 @@
-use crate::kernel::constants::{EACCES, ENOTDIR};
-use crate::kernel::task::block_on;
+use crate::kernel::constants::{EACCES, EISDIR, ENOTDIR};
 use crate::kernel::timer::Instant;
-use crate::kernel::vfs::inode::{AtomicMode, Mode};
+use crate::kernel::vfs::inode::{InodeDirOps, InodeFileOps, InodeInfo, InodeOps, InodeUse};
+use crate::kernel::vfs::types::{DeviceId, Format, Permission};
+use crate::kernel::vfs::{SbRef, SbUse, SuperBlock, SuperBlockInfo};
 use crate::{
     io::Buffer,
     kernel::{
         mem::paging::PageBuffer,
         vfs::{
             dentry::Dentry,
-            inode::{define_struct_inode, AtomicIno, Ino, Inode, InodeData},
+            inode::{Ino, Inode},
             mount::{dump_mounts, register_filesystem, Mount, MountCreator},
-            vfs::Vfs,
-            DevId,
         },
     },
     prelude::*,
 };
-use alloc::sync::{Arc, Weak};
-use core::{ops::ControlFlow, sync::atomic::Ordering};
-use eonix_sync::{AsProof as _, AsProofMut as _, LazyLock, Locked};
-use itertools::Itertools;
-
-#[allow(dead_code)]
-pub trait ProcFsFile: Send + Sync {
-    fn can_read(&self) -> bool {
-        false
-    }
-
-    fn can_write(&self) -> bool {
-        false
-    }
+use alloc::sync::Arc;
+use async_trait::async_trait;
+use core::future::Future;
+use core::sync::atomic::{AtomicU64, Ordering};
+use eonix_sync::{LazyLock, RwLock};
+
+struct Node {
+    ino: Ino,
+    sb: SbRef<ProcFs>,
+    info: Spin<InodeInfo>,
+    kind: NodeKind,
+}
 
-    fn read(&self, _buffer: &mut PageBuffer) -> KResult<usize> {
-        Err(EACCES)
-    }
+enum NodeKind {
+    File(FileInode),
+    Dir(DirInode),
+}
 
-    fn write(&self, _buffer: &[u8]) -> KResult<usize> {
-        Err(EACCES)
-    }
+struct FileInode {
+    read: Option<Box<dyn Fn(&mut PageBuffer) -> KResult<()> + Send + Sync>>,
+    write: Option<()>,
 }
 
-pub enum ProcFsNode {
-    File(Arc<FileInode>),
-    Dir(Arc<DirInode>),
+struct DirInode {
+    entries: RwLock<Vec<(Arc<[u8]>, InodeUse<Node>)>>,
 }
 
-impl ProcFsNode {
-    fn unwrap(&self) -> Arc<dyn Inode> {
-        match self {
-            ProcFsNode::File(inode) => inode.clone(),
-            ProcFsNode::Dir(inode) => inode.clone(),
-        }
-    }
+impl InodeOps for Node {
+    type SuperBlock = ProcFs;
 
     fn ino(&self) -> Ino {
-        match self {
-            ProcFsNode::File(inode) => inode.ino,
-            ProcFsNode::Dir(inode) => inode.ino,
-        }
+        self.ino
     }
-}
 
-define_struct_inode! {
-    pub struct FileInode {
-        file: Box<dyn ProcFsFile>,
-    }
-}
-
-impl FileInode {
-    pub fn new(ino: Ino, vfs: Weak<ProcFs>, file: Box<dyn ProcFsFile>) -> Arc<Self> {
-        let mut mode = Mode::REG;
-        if file.can_read() {
-            mode.set_perm(0o444);
-        }
-        if file.can_write() {
-            mode.set_perm(0o222);
+    fn format(&self) -> Format {
+        match &self.kind {
+            NodeKind::File(_) => Format::REG,
+            NodeKind::Dir(_) => Format::DIR,
         }
+    }
 
-        let mut inode = Self {
-            idata: InodeData::new(ino, vfs),
-            file,
-        };
+    fn info(&self) -> &Spin<InodeInfo> {
+        &self.info
+    }
 
-        inode.idata.mode.store(mode);
-        inode.idata.nlink.store(1, Ordering::Relaxed);
-        *inode.ctime.get_mut() = Instant::now();
-        *inode.mtime.get_mut() = Instant::now();
-        *inode.atime.get_mut() = Instant::now();
+    fn super_block(&self) -> &SbRef<Self::SuperBlock> {
+        &self.sb
+    }
 
-        Arc::new(inode)
+    fn page_cache(&self) -> Option<&crate::kernel::mem::PageCache> {
+        None
     }
 }
 
-impl Inode for FileInode {
-    fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize> {
-        if !self.file.can_read() {
+impl InodeFileOps for Node {
+    async fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize> {
+        let NodeKind::File(file_inode) = &self.kind else {
+            return Err(EISDIR);
+        };
+
+        let Some(read_fn) = &file_inode.read else {
             return Err(EACCES);
-        }
+        };
 
         let mut page_buffer = PageBuffer::new();
-        self.file.read(&mut page_buffer)?;
+        read_fn(&mut page_buffer)?;
 
-        let data = page_buffer
-            .data()
-            .split_at_checked(offset)
-            .map(|(_, data)| data);
+        let Some((_, data)) = page_buffer.data().split_at_checked(offset) else {
+            return Ok(0);
+        };
 
-        match data {
-            None => Ok(0),
-            Some(data) => Ok(buffer.fill(data)?.allow_partial()),
-        }
+        Ok(buffer.fill(data)?.allow_partial())
     }
 }
 
-define_struct_inode! {
-    pub struct DirInode {
-        entries: Locked<Vec<(Arc<[u8]>, ProcFsNode)>, ()>,
-    }
-}
+impl InodeDirOps for Node {
+    async fn lookup(&self, dentry: &Arc<Dentry>) -> KResult<Option<InodeUse<dyn Inode>>> {
+        let NodeKind::Dir(dir) = &self.kind else {
+            return Err(ENOTDIR);
+        };
 
-impl DirInode {
-    pub fn new(ino: Ino, vfs: Weak<ProcFs>) -> Arc<Self> {
-        Self::new_locked(ino, vfs, |inode, rwsem| unsafe {
-            addr_of_mut_field!(inode, entries).write(Locked::new(vec![], rwsem));
-            addr_of_mut_field!(&mut *inode, mode).write(AtomicMode::from(Mode::DIR.perm(0o755)));
-            addr_of_mut_field!(&mut *inode, nlink).write(1.into());
-            addr_of_mut_field!(&mut *inode, ctime).write(Spin::new(Instant::now()));
-            addr_of_mut_field!(&mut *inode, mtime).write(Spin::new(Instant::now()));
-            addr_of_mut_field!(&mut *inode, atime).write(Spin::new(Instant::now()));
-        })
-    }
-}
+        let entries = dir.entries.read().await;
+
+        let dent_name = dentry.name();
+        for (name, node) in entries.iter() {
+            if *name == ***dent_name {
+                return Ok(Some(node.clone() as _));
+            }
+        }
 
-impl Inode for DirInode {
-    fn lookup(&self, dentry: &Arc<Dentry>) -> KResult<Option<Arc<dyn Inode>>> {
-        let lock = block_on(self.rwsem.read());
-        Ok(self
-            .entries
-            .access(lock.prove())
-            .iter()
-            .find_map(|(name, node)| (name == &***dentry.name()).then(|| node.unwrap())))
+        Ok(None)
     }
 
-    fn do_readdir(
-        &self,
+    fn readdir<'r, 'a: 'r, 'b: 'r>(
+        &'a self,
         offset: usize,
-        callback: &mut dyn FnMut(&[u8], Ino) -> KResult<ControlFlow<(), ()>>,
-    ) -> KResult<usize> {
-        let lock = block_on(self.rwsem.read());
-        self.entries
-            .access(lock.prove())
-            .iter()
-            .skip(offset)
-            .map(|(name, node)| callback(name.as_ref(), node.ino()))
-            .take_while(|result| result.map_or(true, |flow| flow.is_continue()))
-            .take_while_inclusive(|result| result.is_ok())
-            .fold_ok(0, |acc, _| acc + 1)
+        callback: &'b mut (dyn FnMut(&[u8], Ino) -> KResult<bool> + Send),
+    ) -> impl Future<Output = KResult<KResult<usize>>> + Send + 'r {
+        Box::pin(async move {
+            let NodeKind::Dir(dir) = &self.kind else {
+                return Err(ENOTDIR);
+            };
+
+            let entries = dir.entries.read().await;
+
+            let mut count = 0;
+            for (name, node) in entries.iter().skip(offset) {
+                match callback(name.as_ref(), node.ino) {
+                    Err(err) => return Ok(Err(err)),
+                    Ok(true) => count += 1,
+                    Ok(false) => break,
+                }
+            }
+
+            Ok(Ok(count))
+        })
     }
 }
 
-impl_any!(ProcFs);
-pub struct ProcFs {
-    root_node: Arc<DirInode>,
-    next_ino: AtomicIno,
-}
-
-impl Vfs for ProcFs {
-    fn io_blksize(&self) -> usize {
-        4096
-    }
-
-    fn fs_devid(&self) -> DevId {
-        10
+impl Node {
+    pub fn new_file(
+        ino: Ino,
+        sb: SbRef<ProcFs>,
+        read: impl Fn(&mut PageBuffer) -> KResult<()> + Send + Sync + 'static,
+    ) -> InodeUse<Self> {
+        InodeUse::new(Self {
+            ino,
+            sb,
+            info: Spin::new(InodeInfo {
+                size: 0,
+                nlink: 1,
+                uid: 0,
+                gid: 0,
+                perm: Permission::new(0o444),
+                atime: Instant::UNIX_EPOCH,
+                ctime: Instant::UNIX_EPOCH,
+                mtime: Instant::UNIX_EPOCH,
+            }),
+            kind: NodeKind::File(FileInode::new(Box::new(read))),
+        })
     }
 
-    fn is_read_only(&self) -> bool {
-        false
+    fn new_dir(ino: Ino, sb: SbRef<ProcFs>) -> InodeUse<Self> {
+        InodeUse::new(Self {
+            ino,
+            sb,
+            info: Spin::new(InodeInfo {
+                size: 0,
+                nlink: 1,
+                uid: 0,
+                gid: 0,
+                perm: Permission::new(0o755),
+                atime: Instant::UNIX_EPOCH,
+                ctime: Instant::UNIX_EPOCH,
+                mtime: Instant::UNIX_EPOCH,
+            }),
+            kind: NodeKind::Dir(DirInode::new()),
+        })
     }
 }
 
-static GLOBAL_PROCFS: LazyLock<Arc<ProcFs>> = LazyLock::new(|| {
-    Arc::new_cyclic(|weak: &Weak<ProcFs>| ProcFs {
-        root_node: DirInode::new(0, weak.clone()),
-        next_ino: AtomicIno::new(1),
-    })
-});
-
-struct ProcFsMountCreator;
-
-#[allow(dead_code)]
-impl ProcFsMountCreator {
-    pub fn get() -> Arc<ProcFs> {
-        GLOBAL_PROCFS.clone()
-    }
-
-    pub fn get_weak() -> Weak<ProcFs> {
-        Arc::downgrade(&GLOBAL_PROCFS)
+impl FileInode {
+    fn new(read: Box<dyn Fn(&mut PageBuffer) -> KResult<()> + Send + Sync>) -> Self {
+        Self {
+            read: Some(read),
+            write: None,
+        }
     }
 }
 
-impl MountCreator for ProcFsMountCreator {
-    fn create_mount(&self, _source: &str, _flags: u64, mp: &Arc<Dentry>) -> KResult<Mount> {
-        let vfs = ProcFsMountCreator::get();
-        let root_inode = vfs.root_node.clone();
-        Mount::new(mp, vfs, root_inode)
-    }
-
-    fn check_signature(&self, _: &[u8]) -> KResult<bool> {
-        Ok(true)
+impl DirInode {
+    pub fn new() -> Self {
+        Self {
+            entries: RwLock::new(vec![]),
+        }
     }
 }
 
-pub fn root() -> ProcFsNode {
-    let vfs = ProcFsMountCreator::get();
-    let root = vfs.root_node.clone();
-
-    ProcFsNode::Dir(root)
+pub struct ProcFs {
+    root: InodeUse<Node>,
+    next_ino: AtomicU64,
 }
 
-pub fn creat(
-    parent: &ProcFsNode,
-    name: Arc<[u8]>,
-    file: Box<dyn ProcFsFile>,
-) -> KResult<ProcFsNode> {
-    let parent = match parent {
-        ProcFsNode::File(_) => return Err(ENOTDIR),
-        ProcFsNode::Dir(parent) => parent,
-    };
-
-    let fs = ProcFsMountCreator::get();
-    let ino = fs.next_ino.fetch_add(1, Ordering::Relaxed);
-
-    let inode = FileInode::new(ino, Arc::downgrade(&fs), file);
-
-    {
-        let lock = block_on(parent.idata.rwsem.write());
-        parent
-            .entries
-            .access_mut(lock.prove_mut())
-            .push((name, ProcFsNode::File(inode.clone())));
+impl SuperBlock for ProcFs {}
+impl ProcFs {
+    fn assign_ino(&self) -> Ino {
+        Ino::new(self.next_ino.fetch_add(1, Ordering::Relaxed))
     }
-
-    Ok(ProcFsNode::File(inode))
 }
 
-#[allow(dead_code)]
-pub fn mkdir(parent: &ProcFsNode, name: &[u8]) -> KResult<ProcFsNode> {
-    let parent = match parent {
-        ProcFsNode::File(_) => return Err(ENOTDIR),
-        ProcFsNode::Dir(parent) => parent,
-    };
-
-    let fs = ProcFsMountCreator::get();
-    let ino = fs.next_ino.fetch_add(1, Ordering::Relaxed);
-
-    let inode = DirInode::new(ino, Arc::downgrade(&fs));
+static GLOBAL_PROCFS: LazyLock<SbUse<ProcFs>> = LazyLock::new(|| {
+    SbUse::new_cyclic(
+        SuperBlockInfo {
+            io_blksize: 4096,
+            device_id: DeviceId::new(0, 10),
+            read_only: false,
+        },
+        |sbref| ProcFs {
+            root: Node::new_dir(Ino::new(0), sbref),
+            next_ino: AtomicU64::new(1),
+        },
+    )
+});
 
-    parent
-        .entries
-        .access_mut(block_on(inode.rwsem.write()).prove_mut())
-        .push((Arc::from(name), ProcFsNode::Dir(inode.clone())));
+struct ProcFsMountCreator;
 
-    Ok(ProcFsNode::Dir(inode))
-}
+#[async_trait]
+impl MountCreator for ProcFsMountCreator {
+    async fn create_mount(&self, _source: &str, _flags: u64, mp: &Arc<Dentry>) -> KResult<Mount> {
+        let fs = GLOBAL_PROCFS.clone();
+        let root_inode = fs.backend.root.clone();
 
-struct DumpMountsFile;
-impl ProcFsFile for DumpMountsFile {
-    fn can_read(&self) -> bool {
-        true
+        Mount::new(mp, fs, root_inode)
     }
 
-    fn read(&self, buffer: &mut PageBuffer) -> KResult<usize> {
-        dump_mounts(&mut buffer.get_writer());
-
-        Ok(buffer.data().len())
+    fn check_signature(&self, _: &[u8]) -> KResult<bool> {
+        Ok(true)
     }
 }
 
-pub fn init() {
-    register_filesystem("procfs", Arc::new(ProcFsMountCreator)).unwrap();
-
-    creat(
-        &root(),
-        Arc::from(b"mounts".as_slice()),
-        Box::new(DumpMountsFile),
-    )
-    .unwrap();
-}
-
-pub struct GenericProcFsFile<ReadFn>
+pub async fn populate_root<F>(name: Arc<[u8]>, read_fn: F)
 where
-    ReadFn: Send + Sync + Fn(&mut PageBuffer) -> KResult<()>,
+    F: Send + Sync + Fn(&mut PageBuffer) -> KResult<()> + 'static,
 {
-    read_fn: Option<ReadFn>,
-}
+    let procfs = &GLOBAL_PROCFS.backend;
+    let root = &procfs.root;
 
-impl<ReadFn> ProcFsFile for GenericProcFsFile<ReadFn>
-where
-    ReadFn: Send + Sync + Fn(&mut PageBuffer) -> KResult<()>,
-{
-    fn can_read(&self) -> bool {
-        self.read_fn.is_some()
-    }
+    let NodeKind::Dir(root) = &root.kind else {
+        unreachable!();
+    };
 
-    fn read(&self, buffer: &mut PageBuffer) -> KResult<usize> {
-        self.read_fn.as_ref().ok_or(EACCES)?(buffer).map(|_| buffer.data().len())
-    }
+    let mut entries = root.entries.write().await;
+    entries.push((
+        name.clone(),
+        Node::new_file(procfs.assign_ino(), SbRef::from(&GLOBAL_PROCFS), read_fn),
+    ));
 }
 
-pub fn populate_root<F>(name: Arc<[u8]>, read_fn: F) -> KResult<()>
-where
-    F: Send + Sync + Fn(&mut PageBuffer) -> KResult<()> + 'static,
-{
-    let root = root();
-
-    creat(
-        &root,
-        name,
-        Box::new(GenericProcFsFile {
-            read_fn: Some(read_fn),
-        }),
-    )
-    .map(|_| ())
+pub async fn init() {
+    register_filesystem("procfs", Arc::new(ProcFsMountCreator)).unwrap();
+
+    populate_root(Arc::from(b"mounts".as_slice()), |buffer| {
+        dump_mounts(&mut buffer.get_writer());
+        Ok(())
+    })
+    .await;
 }

+ 0 - 146
src/fs/shm.rs

@@ -1,146 +0,0 @@
-use core::sync::atomic::{AtomicU32, Ordering};
-
-use alloc::{collections::btree_map::BTreeMap, sync::Arc};
-use bitflags::bitflags;
-use eonix_sync::{LazyLock, Mutex};
-
-use crate::{
-    fs::tmpfs::{DirectoryInode, FileInode, TmpFs},
-    kernel::{constants::ENOSPC, vfs::inode::Mode},
-    prelude::KResult,
-};
-
-bitflags! {
-    #[derive(Debug, Clone, Copy)]
-    pub struct ShmFlags: u32 {
-        /// Create a new segment. If this flag is not used, then shmget() will
-        /// find the segment associated with key and check to see if the user
-        /// has permission to access the segment.
-        const IPC_CREAT = 0o1000;
-        /// This flag is used with IPC_CREAT to ensure that this call creates
-        /// the segment.  If the segment already exists, the call fails.
-        const IPC_EXCL = 0o2000;
-
-        /// Attach the segment for read-only access.If this flag is not specified,
-        /// the segment is attached for read and write access, and the process
-        /// must have read and write permission for the segment.
-        const SHM_RDONLY = 0o10000;
-        /// round attach address to SHMLBA boundary
-        const SHM_RND = 0o20000;
-        /// Allow the contents of the segment to be executed.
-        const SHM_EXEC = 0o100000;
-    }
-}
-
-pub const IPC_PRIVATE: usize = 0;
-
-pub struct ShmManager {
-    tmpfs: Arc<TmpFs>,
-    root: Arc<DirectoryInode>,
-    areas: BTreeMap<u32, ShmArea>,
-}
-
-#[repr(C)]
-#[derive(Default, Clone, Copy, Debug)]
-pub struct IpcPerm {
-    key: i32,
-    uid: u32,
-    gid: u32,
-    cuid: u32,
-    cgid: u32,
-    mode: u16,
-    seq: u16,
-}
-
-#[repr(C)]
-#[derive(Debug, Clone, Copy)]
-pub struct ShmIdDs {
-    // Ownership and permissions
-    pub shm_perm: IpcPerm,
-    // Size of segment (bytes). In our system, this must be aligned
-    pub shm_segsz: usize,
-    // Last attach time
-    pub shm_atime: usize,
-    // Last detach time
-    pub shm_dtime: usize,
-    // Creation time/time of last modification via shmctl()
-    pub shm_ctime: usize,
-    // PID of creator
-    pub shm_cpid: usize,
-    // PID of last shmat(2)/shmdt(2)
-    pub shm_lpid: usize,
-    // No. of current attaches
-    pub shm_nattch: usize,
-}
-
-impl ShmIdDs {
-    fn new(size: usize, pid: u32) -> Self {
-        Self {
-            shm_perm: IpcPerm::default(),
-            shm_segsz: size,
-            shm_atime: 0,
-            shm_dtime: 0,
-            shm_ctime: 0, // Should set instant now
-            shm_cpid: pid as usize,
-            shm_lpid: 0,
-            shm_nattch: 0,
-        }
-    }
-}
-
-#[derive(Debug)]
-pub struct ShmArea {
-    pub area: Arc<FileInode>,
-    pub shmid_ds: ShmIdDs,
-}
-
-// A big lock here to protect the shared memory area.
-// Can be improved with finer-grained locking?
-pub static SHM_MANAGER: LazyLock<Mutex<ShmManager>> =
-    LazyLock::new(|| Mutex::new(ShmManager::new()));
-
-impl ShmManager {
-    fn new() -> Self {
-        let (tmpfs, root) = TmpFs::create(false).expect("should create shm_area successfully");
-        Self {
-            tmpfs,
-            root,
-            areas: BTreeMap::new(),
-        }
-    }
-
-    pub fn create_shared_area(&self, size: usize, pid: u32, mode: Mode) -> ShmArea {
-        let ino = self.tmpfs.assign_ino();
-        let vfs = Arc::downgrade(&self.tmpfs);
-        ShmArea {
-            area: FileInode::new(ino, vfs, size, mode),
-            shmid_ds: ShmIdDs::new(size, pid),
-        }
-    }
-
-    pub fn get(&self, shmid: u32) -> Option<&ShmArea> {
-        self.areas.get(&shmid)
-    }
-
-    pub fn insert(&mut self, shmid: u32, area: ShmArea) {
-        self.areas.insert(shmid, area);
-    }
-}
-
-pub fn gen_shm_id(key: usize) -> KResult<u32> {
-    const SHM_MAGIC: u32 = 114514000;
-
-    static NEXT_SHMID: AtomicU32 = AtomicU32::new(0);
-
-    if key == IPC_PRIVATE {
-        let shmid = NEXT_SHMID.fetch_add(1, Ordering::Relaxed);
-
-        if shmid >= SHM_MAGIC {
-            return Err(ENOSPC);
-        } else {
-            return Ok(shmid);
-        }
-    }
-
-    (key as u32).checked_add(SHM_MAGIC).ok_or(ENOSPC)
-}

+ 0 - 613
src/fs/tmpfs.rs

@@ -1,613 +0,0 @@
-use crate::io::Stream;
-use crate::kernel::constants::{EEXIST, EINVAL, EIO, EISDIR, ENOENT, ENOSYS, ENOTDIR};
-use crate::kernel::mem::{CachePage, CachePageStream, PageCache, PageCacheBackend};
-use crate::kernel::task::block_on;
-use crate::kernel::timer::Instant;
-use crate::kernel::vfs::inode::RenameData;
-use crate::kernel::vfs::inode::{AtomicMode, InodeData};
-use crate::{
-    io::Buffer,
-    kernel::vfs::{
-        dentry::{dcache, Dentry},
-        inode::{define_struct_inode, AtomicIno, Ino, Inode, Mode, WriteOffset},
-        mount::{register_filesystem, Mount, MountCreator, MS_RDONLY},
-        vfs::Vfs,
-        DevId,
-    },
-    prelude::*,
-};
-use alloc::sync::{Arc, Weak};
-use core::fmt::Debug;
-use core::{ops::ControlFlow, sync::atomic::Ordering};
-use eonix_mm::paging::PAGE_SIZE;
-use eonix_sync::{AsProof as _, AsProofMut as _, Locked, Mutex, ProofMut};
-use itertools::Itertools;
-
-fn acquire(vfs: &Weak<dyn Vfs>) -> KResult<Arc<dyn Vfs>> {
-    vfs.upgrade().ok_or(EIO)
-}
-
-fn astmp(vfs: &Arc<dyn Vfs>) -> &TmpFs {
-    vfs.as_any()
-        .downcast_ref::<TmpFs>()
-        .expect("corrupted tmpfs data structure")
-}
-
-define_struct_inode! {
-    struct NodeInode {
-        devid: DevId,
-    }
-}
-
-impl NodeInode {
-    fn new(ino: Ino, vfs: Weak<dyn Vfs>, mode: Mode, devid: DevId) -> Arc<Self> {
-        Self::new_locked(ino, vfs, |inode, _| unsafe {
-            addr_of_mut_field!(inode, devid).write(devid);
-
-            addr_of_mut_field!(&mut *inode, mode).write(AtomicMode::from(mode));
-            addr_of_mut_field!(&mut *inode, nlink).write(1.into());
-            addr_of_mut_field!(&mut *inode, ctime).write(Spin::new(Instant::now()));
-            addr_of_mut_field!(&mut *inode, mtime).write(Spin::new(Instant::now()));
-            addr_of_mut_field!(&mut *inode, atime).write(Spin::new(Instant::now()));
-        })
-    }
-}
-
-impl Inode for NodeInode {
-    fn devid(&self) -> KResult<DevId> {
-        Ok(self.devid)
-    }
-}
-
-define_struct_inode! {
-    pub(super) struct DirectoryInode {
-        entries: Locked<Vec<(Arc<[u8]>, Ino)>, ()>,
-    }
-}
-
-impl DirectoryInode {
-    fn new(ino: Ino, vfs: Weak<dyn Vfs>, mode: Mode) -> Arc<Self> {
-        Self::new_locked(ino, vfs, |inode, rwsem| unsafe {
-            addr_of_mut_field!(inode, entries)
-                .write(Locked::new(vec![(Arc::from(b".".as_slice()), ino)], rwsem));
-
-            addr_of_mut_field!(&mut *inode, size).write(1.into());
-            addr_of_mut_field!(&mut *inode, mode)
-                .write(AtomicMode::from(Mode::DIR.perm(mode.non_format_bits())));
-            addr_of_mut_field!(&mut *inode, nlink).write(1.into()); // link from `.` to itself
-            addr_of_mut_field!(&mut *inode, ctime).write(Spin::new(Instant::now()));
-            addr_of_mut_field!(&mut *inode, mtime).write(Spin::new(Instant::now()));
-            addr_of_mut_field!(&mut *inode, atime).write(Spin::new(Instant::now()));
-        })
-    }
-
-    fn link(&self, name: Arc<[u8]>, file: &dyn Inode, dlock: ProofMut<'_, ()>) {
-        let now = Instant::now();
-
-        // SAFETY: Only `unlink` will do something based on `nlink` count
-        //         No need to synchronize here
-        file.nlink.fetch_add(1, Ordering::Relaxed);
-        *self.ctime.lock() = now;
-
-        // SAFETY: `rwsem` has done the synchronization
-        self.size.fetch_add(1, Ordering::Relaxed);
-        *self.mtime.lock() = now;
-
-        self.entries.access_mut(dlock).push((name, file.ino));
-    }
-
-    fn do_unlink(
-        &self,
-        file: &Arc<dyn Inode>,
-        filename: &[u8],
-        entries: &mut Vec<(Arc<[u8]>, Ino)>,
-        now: Instant,
-        decrease_size: bool,
-        _dir_lock: ProofMut<()>,
-        _file_lock: ProofMut<()>,
-    ) -> KResult<()> {
-        // SAFETY: `file_lock` has done the synchronization
-        if file.mode.load().is_dir() {
-            return Err(EISDIR);
-        }
-
-        entries.retain(|(name, ino)| *ino != file.ino || name.as_ref() != filename);
-
-        if decrease_size {
-            // SAFETY: `dir_lock` has done the synchronization
-            self.size.fetch_sub(1, Ordering::Relaxed);
-        }
-
-        *self.mtime.lock() = now;
-
-        // The last reference to the inode is held by some dentry
-        // and will be released when the dentry is released
-
-        // SAFETY: `file_lock` has done the synchronization
-        file.nlink.fetch_sub(1, Ordering::Relaxed);
-        *file.ctime.lock() = now;
-
-        Ok(())
-    }
-}
-
-impl Inode for DirectoryInode {
-    fn do_readdir(
-        &self,
-        offset: usize,
-        callback: &mut dyn FnMut(&[u8], Ino) -> KResult<ControlFlow<(), ()>>,
-    ) -> KResult<usize> {
-        let lock = block_on(self.rwsem.read());
-        self.entries
-            .access(lock.prove())
-            .iter()
-            .skip(offset)
-            .map(|(name, ino)| callback(&name, *ino))
-            .take_while(|result| result.map_or(true, |flow| flow.is_continue()))
-            .take_while_inclusive(|result| result.is_ok())
-            .fold_ok(0, |acc, _| acc + 1)
-    }
-
-    fn creat(&self, at: &Arc<Dentry>, mode: Mode) -> KResult<()> {
-        let vfs = acquire(&self.vfs)?;
-        let vfs = astmp(&vfs);
-
-        let rwsem = block_on(self.rwsem.write());
-
-        let ino = vfs.assign_ino();
-        let file = FileInode::new(ino, self.vfs.clone(), 0, mode);
-
-        self.link(at.get_name(), file.as_ref(), rwsem.prove_mut());
-        at.save_reg(file)
-    }
-
-    fn mknod(&self, at: &Dentry, mode: Mode, dev: DevId) -> KResult<()> {
-        if !mode.is_chr() && !mode.is_blk() {
-            return Err(EINVAL);
-        }
-
-        let vfs = acquire(&self.vfs)?;
-        let vfs = astmp(&vfs);
-
-        let rwsem = block_on(self.rwsem.write());
-
-        let ino = vfs.assign_ino();
-        let file = NodeInode::new(ino, self.vfs.clone(), mode, dev);
-
-        self.link(at.get_name(), file.as_ref(), rwsem.prove_mut());
-        at.save_reg(file)
-    }
-
-    fn symlink(&self, at: &Arc<Dentry>, target: &[u8]) -> KResult<()> {
-        let vfs = acquire(&self.vfs)?;
-        let vfs = astmp(&vfs);
-
-        let rwsem = block_on(self.rwsem.write());
-
-        let ino = vfs.assign_ino();
-        let file = SymlinkInode::new(ino, self.vfs.clone(), target.into());
-
-        self.link(at.get_name(), file.as_ref(), rwsem.prove_mut());
-        at.save_symlink(file)
-    }
-
-    fn mkdir(&self, at: &Dentry, mode: Mode) -> KResult<()> {
-        let vfs = acquire(&self.vfs)?;
-        let vfs = astmp(&vfs);
-
-        let rwsem = block_on(self.rwsem.write());
-
-        let ino = vfs.assign_ino();
-        let newdir = DirectoryInode::new(ino, self.vfs.clone(), mode);
-
-        self.link(at.get_name(), newdir.as_ref(), rwsem.prove_mut());
-        at.save_dir(newdir)
-    }
-
-    fn unlink(&self, at: &Arc<Dentry>) -> KResult<()> {
-        let _vfs = acquire(&self.vfs)?;
-
-        let dir_lock = block_on(self.rwsem.write());
-
-        let file = at.get_inode()?;
-        let filename = at.get_name();
-        let file_lock = block_on(file.rwsem.write());
-
-        let entries = self.entries.access_mut(dir_lock.prove_mut());
-
-        self.do_unlink(
-            &file,
-            &filename,
-            entries,
-            Instant::now(),
-            true,
-            dir_lock.prove_mut(),
-            file_lock.prove_mut(),
-        )?;
-
-        // Remove the dentry from the dentry cache immediately
-        // so later lookup will fail with ENOENT
-        dcache::d_remove(at);
-
-        Ok(())
-    }
-
-    fn chmod(&self, mode: Mode) -> KResult<()> {
-        let _vfs = acquire(&self.vfs)?;
-        let _lock = block_on(self.rwsem.write());
-
-        // SAFETY: `rwsem` has done the synchronization
-        let old = self.mode.load();
-        self.mode.store(old.perm(mode.non_format_bits()));
-        *self.ctime.lock() = Instant::now();
-
-        Ok(())
-    }
-
-    fn rename(&self, rename_data: RenameData) -> KResult<()> {
-        let RenameData {
-            old_dentry,
-            new_dentry,
-            new_parent,
-            is_exchange,
-            no_replace,
-            vfs,
-        } = rename_data;
-
-        if is_exchange {
-            println_warn!("TmpFs does not support exchange rename for now");
-            return Err(ENOSYS);
-        }
-
-        let vfs = vfs
-            .as_any()
-            .downcast_ref::<TmpFs>()
-            .expect("vfs must be a TmpFs");
-
-        let _rename_lock = block_on(vfs.rename_lock.lock());
-
-        let old_file = old_dentry.get_inode()?;
-        let new_file = new_dentry.get_inode();
-
-        if no_replace && new_file.is_ok() {
-            return Err(EEXIST);
-        }
-
-        let same_parent = Arc::as_ptr(&new_parent) == &raw const *self;
-        if same_parent {
-            // Same directory rename
-            // Remove from old location and add to new location
-            let parent_lock = block_on(self.rwsem.write());
-            let entries = self.entries.access_mut(parent_lock.prove_mut());
-
-            fn rename_old(
-                old_entry: &mut (Arc<[u8]>, Ino),
-                old_file: &Arc<dyn Inode + 'static>,
-                new_dentry: &Arc<Dentry>,
-                now: Instant,
-            ) {
-                let (name, _) = old_entry;
-                *name = new_dentry.get_name();
-                *old_file.ctime.lock() = now;
-            }
-
-            let old_ino = old_file.ino;
-            let new_ino = new_file.as_ref().ok().map(|f| f.ino);
-            let old_name = old_dentry.get_name();
-            let new_name = new_dentry.get_name();
-
-            // Find the old and new entries in the directory after we've locked the directory.
-            let indices =
-                entries
-                    .iter()
-                    .enumerate()
-                    .fold([None, None], |[old, new], (idx, (name, ino))| {
-                        if Some(*ino) == new_ino && *name == new_name {
-                            [old, Some(idx)]
-                        } else if *ino == old_ino && *name == old_name {
-                            [Some(idx), new]
-                        } else {
-                            [old, new]
-                        }
-                    });
-
-            let (old_entry_idx, new_entry_idx) = match indices {
-                [None, ..] => return Err(ENOENT),
-                [Some(old_idx), new_idx] => (old_idx, new_idx),
-            };
-
-            let now = Instant::now();
-
-            if let Some(new_idx) = new_entry_idx {
-                // Replace existing file (i.e. rename the old and unlink the new)
-                let new_file = new_file.unwrap();
-                let _new_file_lock = block_on(new_file.rwsem.write());
-
-                // SAFETY: `new_file_lock` has done the synchronization
-                match (new_file.mode.load(), old_file.mode.load()) {
-                    (Mode::DIR, _) => return Err(EISDIR),
-                    (_, Mode::DIR) => return Err(ENOTDIR),
-                    _ => {}
-                }
-
-                entries.remove(new_idx);
-
-                // SAFETY: `parent_lock` has done the synchronization
-                self.size.fetch_sub(1, Ordering::Relaxed);
-
-                // The last reference to the inode is held by some dentry
-                // and will be released when the dentry is released
-
-                // SAFETY: `new_file_lock` has done the synchronization
-                new_file.nlink.fetch_sub(1, Ordering::Relaxed);
-                *new_file.ctime.lock() = now;
-            }
-
-            rename_old(&mut entries[old_entry_idx], &old_file, new_dentry, now);
-            *self.mtime.lock() = now;
-        } else {
-            // Cross-directory rename - handle similar to same directory case
-
-            // Get new parent directory
-            let new_parent_inode = new_dentry.parent().get_inode()?;
-            assert!(new_parent_inode.is_dir());
-            let new_parent = (new_parent_inode.as_ref() as &dyn Any)
-                .downcast_ref::<DirectoryInode>()
-                .expect("new parent must be a DirectoryInode");
-
-            let old_parent_lock = block_on(self.rwsem.write());
-            let new_parent_lock = block_on(new_parent_inode.rwsem.write());
-
-            let old_ino = old_file.ino;
-            let new_ino = new_file.as_ref().ok().map(|f| f.ino);
-            let old_name = old_dentry.get_name();
-            let new_name = new_dentry.get_name();
-
-            // Find the old entry in the old directory
-            let old_entries = self.entries.access_mut(old_parent_lock.prove_mut());
-            let old_pos = old_entries
-                .iter()
-                .position(|(name, ino)| *ino == old_ino && *name == old_name)
-                .ok_or(ENOENT)?;
-
-            // Find the new entry in the new directory (if it exists)
-            let new_entries = new_parent.entries.access_mut(new_parent_lock.prove_mut());
-            let has_new = new_entries
-                .iter()
-                .position(|(name, ino)| Some(*ino) == new_ino && *name == new_name)
-                .is_some();
-
-            let now = Instant::now();
-
-            if has_new {
-                // Replace existing file (i.e. move the old and unlink the new)
-                let new_file = new_file.unwrap();
-                let new_file_lock = block_on(new_file.rwsem.write());
-
-                match (old_file.mode.load(), new_file.mode.load()) {
-                    (Mode::DIR, Mode::DIR) => {}
-                    (Mode::DIR, _) => return Err(ENOTDIR),
-                    (_, _) => {}
-                }
-
-                // Unlink the old file that was replaced
-                new_parent.do_unlink(
-                    &new_file,
-                    &new_name,
-                    new_entries,
-                    now,
-                    false,
-                    new_parent_lock.prove_mut(),
-                    new_file_lock.prove_mut(),
-                )?;
-            } else {
-                new_parent.size.fetch_add(1, Ordering::Relaxed);
-            }
-
-            // Remove from old directory
-            old_entries.remove(old_pos);
-
-            // Add new entry
-            new_entries.push((new_name, old_ino));
-
-            self.size.fetch_sub(1, Ordering::Relaxed);
-            *self.mtime.lock() = now;
-            *old_file.ctime.lock() = now;
-        }
-
-        block_on(dcache::d_exchange(old_dentry, new_dentry));
-
-        Ok(())
-    }
-}
-
-define_struct_inode! {
-    struct SymlinkInode {
-        target: Arc<[u8]>,
-    }
-}
-
-impl SymlinkInode {
-    fn new(ino: Ino, vfs: Weak<dyn Vfs>, target: Arc<[u8]>) -> Arc<Self> {
-        Self::new_locked(ino, vfs, |inode, _| unsafe {
-            let len = target.len();
-            addr_of_mut_field!(inode, target).write(target);
-
-            addr_of_mut_field!(&mut *inode, mode).write(AtomicMode::from(Mode::LNK.perm(0o777)));
-            addr_of_mut_field!(&mut *inode, size).write((len as u64).into());
-            addr_of_mut_field!(&mut *inode, ctime).write(Spin::new(Instant::now()));
-            addr_of_mut_field!(&mut *inode, mtime).write(Spin::new(Instant::now()));
-            addr_of_mut_field!(&mut *inode, atime).write(Spin::new(Instant::now()));
-        })
-    }
-}
-
-impl Inode for SymlinkInode {
-    fn readlink(&self, buffer: &mut dyn Buffer) -> KResult<usize> {
-        buffer
-            .fill(self.target.as_ref())
-            .map(|result| result.allow_partial())
-    }
-
-    fn chmod(&self, _: Mode) -> KResult<()> {
-        Ok(())
-    }
-}
-
-define_struct_inode! {
-    pub struct FileInode {
-        pages: PageCache,
-    }
-}
-
-impl Debug for FileInode {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-        write!(f, "FileInode({:?})", self.idata)
-    }
-}
-
-impl FileInode {
-    pub fn new(ino: Ino, vfs: Weak<dyn Vfs>, size: usize, mode: Mode) -> Arc<Self> {
-        let inode = Arc::new_cyclic(|weak_self: &Weak<FileInode>| FileInode {
-            idata: InodeData::new(ino, vfs),
-            pages: PageCache::new(weak_self.clone()),
-        });
-
-        inode.mode.store(Mode::REG.perm(mode.non_format_bits()));
-        inode.nlink.store(1, Ordering::Relaxed);
-        inode.size.store(size as u64, Ordering::Relaxed);
-        inode
-    }
-}
-
-impl PageCacheBackend for FileInode {
-    fn read_page(&self, _cache_page: &mut CachePage, _offset: usize) -> KResult<usize> {
-        Ok(PAGE_SIZE)
-    }
-
-    fn write_page(&self, _page: &mut CachePageStream, _offset: usize) -> KResult<usize> {
-        Ok(PAGE_SIZE)
-    }
-
-    fn size(&self) -> usize {
-        self.size.load(Ordering::Relaxed) as usize
-    }
-}
-
-impl Inode for FileInode {
-    fn page_cache(&self) -> Option<&PageCache> {
-        Some(&self.pages)
-    }
-
-    fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize> {
-        let _lock = block_on(self.rwsem.write());
-        block_on(self.pages.read(buffer, offset))
-    }
-
-    fn write(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult<usize> {
-        // TODO: We don't need that strong guarantee, find some way to avoid locks
-        let _lock = block_on(self.rwsem.write());
-
-        let mut store_new_end = None;
-        let offset = match offset {
-            WriteOffset::Position(offset) => offset,
-            WriteOffset::End(end) => {
-                store_new_end = Some(end);
-
-                // SAFETY: `lock` has done the synchronization
-                self.size.load(Ordering::Relaxed) as usize
-            }
-        };
-
-        let wrote = block_on(self.pages.write(stream, offset))?;
-        let cursor_end = offset + wrote;
-
-        if let Some(store_end) = store_new_end {
-            *store_end = cursor_end;
-        }
-
-        // SAFETY: `lock` has done the synchronization
-        *self.mtime.lock() = Instant::now();
-        self.size.store(cursor_end as u64, Ordering::Relaxed);
-
-        Ok(wrote)
-    }
-
-    fn truncate(&self, length: usize) -> KResult<()> {
-        let _lock = block_on(self.rwsem.write());
-        block_on(self.pages.resize(length))?;
-        self.size.store(length as u64, Ordering::Relaxed);
-        *self.mtime.lock() = Instant::now();
-        Ok(())
-    }
-
-    fn chmod(&self, mode: Mode) -> KResult<()> {
-        let _vfs = acquire(&self.vfs)?;
-        let _lock = block_on(self.rwsem.write());
-
-        // SAFETY: `rwsem` has done the synchronization
-        let old = self.mode.load();
-        self.mode.store(old.perm(mode.non_format_bits()));
-        *self.ctime.lock() = Instant::now();
-
-        Ok(())
-    }
-}
-
-impl_any!(TmpFs);
-pub(super) struct TmpFs {
-    next_ino: AtomicIno,
-    readonly: bool,
-    rename_lock: Mutex<()>,
-}
-
-impl Vfs for TmpFs {
-    fn io_blksize(&self) -> usize {
-        4096
-    }
-
-    fn fs_devid(&self) -> DevId {
-        2
-    }
-
-    fn is_read_only(&self) -> bool {
-        self.readonly
-    }
-}
-
-impl TmpFs {
-    pub(super) fn assign_ino(&self) -> Ino {
-        self.next_ino.fetch_add(1, Ordering::AcqRel)
-    }
-
-    pub fn create(readonly: bool) -> KResult<(Arc<TmpFs>, Arc<DirectoryInode>)> {
-        let tmpfs = Arc::new(Self {
-            next_ino: AtomicIno::new(1),
-            readonly,
-            rename_lock: Mutex::new(()),
-        });
-
-        let weak = Arc::downgrade(&tmpfs);
-        let root_dir = DirectoryInode::new(0, weak, Mode::new(0o755));
-
-        Ok((tmpfs, root_dir))
-    }
-}
-
-struct TmpFsMountCreator;
-
-impl MountCreator for TmpFsMountCreator {
-    fn create_mount(&self, _source: &str, flags: u64, mp: &Arc<Dentry>) -> KResult<Mount> {
-        let (fs, root_inode) = TmpFs::create(flags & MS_RDONLY != 0)?;
-
-        Mount::new(mp, fs, root_inode)
-    }
-
-    fn check_signature(&self, _: &[u8]) -> KResult<bool> {
-        Ok(true)
-    }
-}
-
-pub fn init() {
-    register_filesystem("tmpfs", Arc::new(TmpFsMountCreator)).unwrap();
-}

+ 415 - 0
src/fs/tmpfs/dir.rs

@@ -0,0 +1,415 @@
+use core::{any::Any, future::Future};
+
+use alloc::{boxed::Box, sync::Arc, vec, vec::Vec};
+use eonix_log::println_warn;
+use eonix_sync::{LazyLock, RwLock, Spin};
+
+use crate::{
+    kernel::{
+        constants::{EEXIST, EINVAL, EISDIR, ENOENT, ENOSYS, ENOTDIR},
+        mem::PageCache,
+        timer::Instant,
+        vfs::{
+            dentry::{dcache, Dentry},
+            inode::{
+                Ino, Inode, InodeDirOps, InodeFileOps, InodeInfo, InodeOps, InodeUse, RenameData,
+            },
+            types::{DeviceId, Format, Mode, Permission},
+            SbRef,
+        },
+    },
+    prelude::KResult,
+};
+
+use super::{
+    file::{DeviceInode, FileInode, SymlinkInode},
+    TmpFs,
+};
+
+pub struct DirectoryInode {
+    sb: SbRef<TmpFs>,
+    ino: Ino,
+    info: Spin<InodeInfo>,
+    entries: RwLock<Vec<(Arc<[u8]>, Ino)>>,
+}
+
+impl InodeOps for DirectoryInode {
+    type SuperBlock = TmpFs;
+
+    fn ino(&self) -> Ino {
+        self.ino
+    }
+
+    fn format(&self) -> Format {
+        Format::DIR
+    }
+
+    fn info(&self) -> &Spin<InodeInfo> {
+        &self.info
+    }
+
+    fn super_block(&self) -> &SbRef<Self::SuperBlock> {
+        &self.sb
+    }
+
+    fn page_cache(&self) -> Option<&PageCache> {
+        None
+    }
+}
+
+impl DirectoryInode {
+    pub fn new(ino: Ino, sb: SbRef<TmpFs>, perm: Permission) -> InodeUse<Self> {
+        static DOT: LazyLock<Arc<[u8]>> = LazyLock::new(|| Arc::from(b".".as_slice()));
+
+        let now = Instant::now();
+
+        InodeUse::new(Self {
+            sb,
+            ino,
+            info: Spin::new(InodeInfo {
+                size: 1,
+                nlink: 1, // link from `.` to itself
+                perm,
+                ctime: now,
+                mtime: now,
+                atime: now,
+                uid: 0,
+                gid: 0,
+            }),
+            entries: RwLock::new(vec![(DOT.clone(), ino)]),
+        })
+    }
+
+    fn link(
+        &self,
+        entries: &mut Vec<(Arc<[u8]>, Ino)>,
+        name: Arc<[u8]>,
+        file: &InodeUse<dyn Inode>,
+    ) {
+        let mut self_info = self.info.lock();
+        let mut file_info = file.info().lock();
+
+        let now = Instant::now();
+
+        file_info.nlink += 1;
+        file_info.ctime = now;
+
+        self_info.size += 1;
+        self_info.mtime = now;
+        self_info.ctime = now;
+
+        entries.push((name, file.ino()));
+    }
+
+    fn do_unlink(
+        &self,
+        file: &InodeUse<dyn Inode>,
+        filename: &[u8],
+        entries: &mut Vec<(Arc<[u8]>, Ino)>,
+        now: Instant,
+        decrease_size: bool,
+        self_info: &mut InodeInfo,
+        file_info: &mut InodeInfo,
+    ) -> KResult<()> {
+        // SAFETY: `file_lock` has done the synchronization
+        if file.format() == Format::DIR {
+            return Err(EISDIR);
+        }
+
+        let file_ino = file.ino();
+        entries.retain(|(name, ino)| *ino != file_ino || name.as_ref() != filename);
+
+        if decrease_size {
+            self_info.size -= 1;
+        }
+
+        self_info.mtime = now;
+        self_info.ctime = now;
+
+        // The last reference to the inode is held by some dentry
+        // and will be released when the dentry is released
+
+        file_info.nlink -= 1;
+        file_info.ctime = now;
+
+        // TODO!!!: Remove the file if nlink == 1
+
+        Ok(())
+    }
+}
+
+impl InodeDirOps for DirectoryInode {
+    fn readdir<'r, 'a: 'r, 'b: 'r>(
+        &'a self,
+        offset: usize,
+        for_each_entry: &'b mut (dyn FnMut(&[u8], Ino) -> KResult<bool> + Send),
+    ) -> impl Future<Output = KResult<KResult<usize>>> + Send + 'r {
+        Box::pin(async move {
+            let _sb = self.sb.get()?;
+            let entries = self.entries.read().await;
+
+            let mut count = 0;
+            for entry in entries.iter().skip(offset) {
+                match for_each_entry(&entry.0, entry.1) {
+                    Err(err) => return Ok(Err(err)),
+                    Ok(false) => break,
+                    Ok(true) => count += 1,
+                }
+            }
+
+            Ok(Ok(count))
+        })
+    }
+
+    async fn create(&self, at: &Arc<Dentry>, perm: Permission) -> KResult<()> {
+        let sb = self.sb.get()?;
+        let mut entries = self.entries.write().await;
+
+        let ino = sb.backend.assign_ino();
+        let file: InodeUse<dyn Inode> = FileInode::new(ino, self.sb.clone(), 0, perm);
+
+        self.link(&mut entries, at.get_name(), &file);
+        at.fill(file);
+
+        Ok(())
+    }
+
+    async fn mknod(&self, at: &Dentry, mode: Mode, dev: DeviceId) -> KResult<()> {
+        if !mode.is_chr() && !mode.is_blk() {
+            return Err(EINVAL);
+        }
+
+        let sb = self.sb.get()?;
+        let mut entries = self.entries.write().await;
+
+        let ino = sb.backend.assign_ino();
+        let file: InodeUse<dyn Inode> = DeviceInode::new(ino, self.sb.clone(), mode, dev);
+
+        self.link(&mut entries, at.get_name(), &file);
+        at.fill(file);
+
+        Ok(())
+    }
+
+    async fn symlink(&self, at: &Arc<Dentry>, target: &[u8]) -> KResult<()> {
+        let sb = self.sb.get()?;
+        let mut entries = self.entries.write().await;
+
+        let ino = sb.backend.assign_ino();
+        let file: InodeUse<dyn Inode> = SymlinkInode::new(ino, self.sb.clone(), target.into());
+
+        self.link(&mut entries, at.get_name(), &file);
+        at.fill(file);
+
+        Ok(())
+    }
+
+    async fn mkdir(&self, at: &Dentry, perm: Permission) -> KResult<()> {
+        let sb = self.sb.get()?;
+        let mut entries = self.entries.write().await;
+
+        let ino = sb.backend.assign_ino();
+        let new_dir: InodeUse<dyn Inode> = DirectoryInode::new(ino, self.sb.clone(), perm);
+
+        self.link(&mut entries, at.get_name(), &new_dir);
+        at.fill(new_dir);
+
+        Ok(())
+    }
+
+    async fn unlink(&self, at: &Arc<Dentry>) -> KResult<()> {
+        let _sb = self.sb.get()?;
+        let mut entries = self.entries.write().await;
+
+        let file = at.get_inode()?;
+        let filename = at.get_name();
+
+        self.do_unlink(
+            &file,
+            &filename,
+            &mut entries,
+            Instant::now(),
+            true,
+            &mut self.info.lock(),
+            &mut file.info().lock(),
+        )?;
+
+        // Remove the dentry from the dentry cache immediately
+        // so later lookup will fail with ENOENT
+        dcache::d_remove(at);
+
+        Ok(())
+    }
+
+    async fn rename(&self, rename_data: RenameData<'_, '_>) -> KResult<()> {
+        let sb = self.sb.get()?;
+        let _rename_lock = sb.backend.rename_lock.lock().await;
+        let mut self_entries = self.entries.write().await;
+
+        let RenameData {
+            old_dentry,
+            new_dentry,
+            new_parent,
+            is_exchange,
+            no_replace,
+        } = rename_data;
+
+        if is_exchange {
+            println_warn!("TmpFs does not support exchange rename for now");
+            return Err(ENOSYS);
+        }
+
+        let old_file = old_dentry.get_inode()?;
+        let new_file = new_dentry.inode();
+
+        if no_replace && new_file.is_some() {
+            return Err(EEXIST);
+        }
+
+        if new_parent.as_raw() == &raw const *self {
+            // Same directory rename
+            // Remove from old location and add to new location
+            let old_ino = old_file.ino();
+            let new_ino = new_file.as_ref().map(|f| f.ino());
+            let old_name = old_dentry.get_name();
+            let new_name = new_dentry.get_name();
+
+            // Find the old and new entries in the directory after we've locked the directory.
+            let (mut old_ent_idx, mut new_ent_idx) = (None, None);
+            for (idx, (name, ino)) in self_entries.iter().enumerate() {
+                if *ino == old_ino && *name == old_name {
+                    old_ent_idx = Some(idx);
+                }
+
+                if Some(*ino) == new_ino && *name == new_name {
+                    new_ent_idx = Some(idx);
+                }
+            }
+
+            let Some(old_ent_idx) = old_ent_idx else {
+                return Err(ENOENT);
+            };
+
+            if Some(old_ent_idx) == new_ent_idx {
+                return Ok(());
+            }
+
+            let now = Instant::now();
+            if let Some(new_idx) = new_ent_idx {
+                // Replace existing file (i.e. rename the old and unlink the new)
+                let new_file = new_file.unwrap();
+
+                match (new_file.format(), old_file.format()) {
+                    (Format::DIR, _) => return Err(EISDIR),
+                    (_, Format::DIR) => return Err(ENOTDIR),
+                    _ => {}
+                }
+
+                self_entries.remove(new_idx);
+
+                self.info.lock().size -= 1;
+
+                // The last reference to the inode is held by some dentry
+                // and will be released when the dentry is released
+
+                let mut new_info = new_file.info().lock();
+
+                new_info.nlink -= 1;
+                new_info.mtime = now;
+                new_info.ctime = now;
+            }
+
+            let (name, _) = &mut self_entries[old_ent_idx];
+            *name = new_dentry.get_name();
+
+            let mut self_info = self.info.lock();
+            self_info.mtime = now;
+            self_info.ctime = now;
+        } else {
+            // Cross-directory rename - handle similar to same directory case
+
+            // Get new parent directory
+            let new_parent_inode = new_dentry.parent().get_inode()?;
+            assert_eq!(new_parent_inode.format(), Format::DIR);
+
+            let new_parent = (&new_parent_inode as &dyn Any)
+                .downcast_ref::<DirectoryInode>()
+                .expect("new parent must be a DirectoryInode");
+
+            let mut new_entries = new_parent.entries.write().await;
+
+            let old_ino = old_file.ino();
+            let new_ino = new_file.as_ref().map(|f| f.ino());
+            let old_name = old_dentry.get_name();
+            let new_name = new_dentry.get_name();
+
+            // Find the old entry in the old directory
+            let old_pos = self_entries
+                .iter()
+                .position(|(name, ino)| *ino == old_ino && *name == old_name)
+                .ok_or(ENOENT)?;
+
+            // Find the new entry in the new directory (if it exists)
+            let has_new = new_entries
+                .iter()
+                .position(|(name, ino)| Some(*ino) == new_ino && *name == new_name)
+                .is_some();
+
+            let now = Instant::now();
+
+            if has_new {
+                // Replace existing file (i.e. move the old and unlink the new)
+                let new_file = new_file.unwrap();
+
+                match (old_file.format(), new_file.format()) {
+                    (Format::DIR, Format::DIR) => {}
+                    (Format::DIR, _) => return Err(ENOTDIR),
+                    (_, _) => {}
+                }
+
+                // Unlink the old file that was replaced
+                new_parent.do_unlink(
+                    &new_file,
+                    &new_name,
+                    &mut new_entries,
+                    now,
+                    false,
+                    &mut new_parent.info.lock(),
+                    &mut new_file.info().lock(),
+                )?;
+            } else {
+                new_parent.info.lock().size += 1;
+                new_parent.info.lock().mtime = now;
+                new_parent.info.lock().ctime = now;
+            }
+
+            // Remove from old directory
+            self_entries.remove(old_pos);
+
+            // Add new entry
+            new_entries.push((new_name, old_ino));
+
+            let mut self_info = self.info.lock();
+            self_info.size -= 1;
+            self_info.mtime = now;
+            self_info.ctime = now;
+        }
+
+        dcache::d_exchange(old_dentry, new_dentry).await;
+        Ok(())
+    }
+}
+
+impl InodeFileOps for DirectoryInode {
+    async fn chmod(&self, perm: Permission) -> KResult<()> {
+        let _sb = self.sb.get()?;
+
+        {
+            let mut info = self.info.lock();
+            info.perm = perm;
+            info.ctime = Instant::now();
+        }
+
+        Ok(())
+    }
+}

+ 298 - 0
src/fs/tmpfs/file.rs

@@ -0,0 +1,298 @@
+use alloc::sync::Arc;
+use eonix_mm::paging::PAGE_SIZE;
+use eonix_sync::{RwLock, Spin};
+
+use crate::{
+    io::{Buffer, Stream},
+    kernel::{
+        mem::{CachePage, CachePageStream, PageCache, PageCacheBackendOps},
+        timer::Instant,
+        vfs::{
+            inode::{Ino, InodeDirOps, InodeFileOps, InodeInfo, InodeOps, InodeUse, WriteOffset},
+            types::{DeviceId, Format, Mode, Permission},
+            SbRef,
+        },
+    },
+    prelude::KResult,
+};
+
+use super::TmpFs;
+
+pub struct FileInode {
+    sb: SbRef<TmpFs>,
+    ino: Ino,
+    info: Spin<InodeInfo>,
+    rwsem: RwLock<()>,
+    pages: PageCache,
+}
+
+impl FileInode {
+    pub fn new(ino: Ino, sb: SbRef<TmpFs>, size: usize, perm: Permission) -> InodeUse<Self> {
+        let now = Instant::now();
+
+        InodeUse::new_cyclic(|weak| Self {
+            sb,
+            ino,
+            info: Spin::new(InodeInfo {
+                size: size as _,
+                nlink: 1,
+                uid: 0,
+                gid: 0,
+                perm,
+                atime: now,
+                ctime: now,
+                mtime: now,
+            }),
+            rwsem: RwLock::new(()),
+            pages: PageCache::new(weak.clone() as _),
+        })
+    }
+}
+
+impl PageCacheBackendOps for FileInode {
+    async fn read_page(&self, _cache_page: &mut CachePage, _offset: usize) -> KResult<usize> {
+        Ok(PAGE_SIZE)
+    }
+
+    async fn write_page(&self, _page: &mut CachePageStream, _offset: usize) -> KResult<usize> {
+        Ok(PAGE_SIZE)
+    }
+
+    fn size(&self) -> usize {
+        self.info.lock().size as usize
+    }
+}
+
+impl InodeOps for FileInode {
+    type SuperBlock = TmpFs;
+
+    fn ino(&self) -> Ino {
+        self.ino
+    }
+
+    fn format(&self) -> Format {
+        Format::REG
+    }
+
+    fn info(&self) -> &Spin<InodeInfo> {
+        &self.info
+    }
+
+    fn super_block(&self) -> &SbRef<Self::SuperBlock> {
+        &self.sb
+    }
+
+    fn page_cache(&self) -> Option<&PageCache> {
+        Some(&self.pages)
+    }
+}
+
+impl InodeDirOps for FileInode {}
+impl InodeFileOps for FileInode {
+    async fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize> {
+        let _lock = self.rwsem.read().await;
+        self.pages.read(buffer, offset).await
+    }
+
+    async fn write(&self, stream: &mut dyn Stream, offset: WriteOffset<'_>) -> KResult<usize> {
+        let _lock = self.rwsem.write().await;
+
+        let mut store_new_end = None;
+        let offset = match offset {
+            WriteOffset::Position(offset) => offset,
+            WriteOffset::End(end) => {
+                store_new_end = Some(end);
+
+                // `info.size` won't change since we are holding the write lock.
+                self.info.lock().size as usize
+            }
+        };
+
+        let wrote = self.pages.write(stream, offset).await?;
+        let cursor_end = offset + wrote;
+
+        if let Some(store_end) = store_new_end {
+            *store_end = cursor_end;
+        }
+
+        {
+            let now = Instant::now();
+            let mut info = self.info.lock();
+            info.mtime = now;
+            info.ctime = now;
+            info.size = info.size.max(cursor_end as u64);
+        }
+
+        Ok(wrote)
+    }
+
+    async fn truncate(&self, length: usize) -> KResult<()> {
+        let _lock = self.rwsem.write().await;
+
+        self.pages.resize(length).await?;
+
+        {
+            let now = Instant::now();
+            let mut info = self.info.lock();
+            info.mtime = now;
+            info.ctime = now;
+            info.size = length as u64;
+        }
+
+        Ok(())
+    }
+
+    async fn chmod(&self, perm: Permission) -> KResult<()> {
+        let _sb = self.sb.get()?;
+
+        {
+            let mut info = self.info.lock();
+
+            info.perm = perm;
+            info.ctime = Instant::now();
+        }
+
+        Ok(())
+    }
+}
+
+pub struct DeviceInode {
+    sb: SbRef<TmpFs>,
+    ino: Ino,
+    info: Spin<InodeInfo>,
+    is_block: bool,
+    devid: DeviceId,
+}
+
+impl DeviceInode {
+    pub fn new(ino: Ino, sb: SbRef<TmpFs>, mode: Mode, devid: DeviceId) -> InodeUse<Self> {
+        let now = Instant::now();
+
+        InodeUse::new(Self {
+            sb,
+            ino,
+            info: Spin::new(InodeInfo {
+                size: 0,
+                nlink: 1,
+                uid: 0,
+                gid: 0,
+                perm: Permission::new(mode.non_format_bits()),
+                atime: now,
+                ctime: now,
+                mtime: now,
+            }),
+            is_block: mode.format() == Format::BLK,
+            devid,
+        })
+    }
+}
+
+impl InodeOps for DeviceInode {
+    type SuperBlock = TmpFs;
+
+    fn ino(&self) -> Ino {
+        self.ino
+    }
+
+    fn format(&self) -> Format {
+        if self.is_block {
+            Format::BLK
+        } else {
+            Format::CHR
+        }
+    }
+
+    fn info(&self) -> &Spin<InodeInfo> {
+        &self.info
+    }
+
+    fn super_block(&self) -> &SbRef<Self::SuperBlock> {
+        &self.sb
+    }
+
+    fn page_cache(&self) -> Option<&PageCache> {
+        None
+    }
+}
+
+impl InodeDirOps for DeviceInode {}
+impl InodeFileOps for DeviceInode {
+    async fn chmod(&self, perm: Permission) -> KResult<()> {
+        let _sb = self.sb.get()?;
+
+        {
+            let mut info = self.info.lock();
+
+            info.perm = perm;
+            info.ctime = Instant::now();
+        }
+
+        Ok(())
+    }
+
+    fn devid(&self) -> KResult<DeviceId> {
+        Ok(self.devid)
+    }
+}
+
+pub struct SymlinkInode {
+    sb: SbRef<TmpFs>,
+    ino: Ino,
+    info: Spin<InodeInfo>,
+    target: Arc<[u8]>,
+}
+
+impl SymlinkInode {
+    pub fn new(ino: Ino, sb: SbRef<TmpFs>, target: Arc<[u8]>) -> InodeUse<Self> {
+        let now = Instant::now();
+
+        InodeUse::new(Self {
+            sb,
+            ino,
+            info: Spin::new(InodeInfo {
+                size: target.len() as _,
+                nlink: 1,
+                uid: 0,
+                gid: 0,
+                perm: Permission::new(0o777),
+                atime: now,
+                ctime: now,
+                mtime: now,
+            }),
+            target,
+        })
+    }
+}
+
+impl InodeDirOps for SymlinkInode {}
+impl InodeOps for SymlinkInode {
+    type SuperBlock = TmpFs;
+
+    fn ino(&self) -> Ino {
+        self.ino
+    }
+
+    fn format(&self) -> Format {
+        Format::LNK
+    }
+
+    fn info(&self) -> &Spin<InodeInfo> {
+        &self.info
+    }
+
+    fn super_block(&self) -> &SbRef<Self::SuperBlock> {
+        &self.sb
+    }
+
+    fn page_cache(&self) -> Option<&PageCache> {
+        None
+    }
+}
+
+impl InodeFileOps for SymlinkInode {
+    async fn readlink(&self, buffer: &mut dyn Buffer) -> KResult<usize> {
+        buffer
+            .fill(self.target.as_ref())
+            .map(|result| result.allow_partial())
+    }
+}

+ 73 - 0
src/fs/tmpfs/mod.rs

@@ -0,0 +1,73 @@
+mod dir;
+mod file;
+
+use crate::kernel::vfs::inode::{Ino, InodeUse};
+use crate::kernel::vfs::types::{DeviceId, Permission};
+use crate::kernel::vfs::{SbRef, SbUse, SuperBlock, SuperBlockInfo};
+use crate::{
+    kernel::vfs::{
+        dentry::Dentry,
+        mount::{register_filesystem, Mount, MountCreator},
+    },
+    prelude::*,
+};
+use alloc::sync::Arc;
+use async_trait::async_trait;
+use core::sync::atomic::AtomicU64;
+use core::sync::atomic::Ordering;
+use dir::DirectoryInode;
+use eonix_sync::Mutex;
+
+pub struct TmpFs {
+    next_ino: AtomicU64,
+    rename_lock: Mutex<()>,
+}
+
+impl SuperBlock for TmpFs {}
+
+impl TmpFs {
+    fn assign_ino(&self) -> Ino {
+        Ino::new(self.next_ino.fetch_add(1, Ordering::Relaxed))
+    }
+
+    fn create() -> KResult<(SbUse<TmpFs>, InodeUse<DirectoryInode>)> {
+        let tmpfs = SbUse::new(
+            SuperBlockInfo {
+                io_blksize: 4096,
+                device_id: DeviceId::new(0, 2),
+                read_only: false,
+            },
+            Self {
+                next_ino: AtomicU64::new(1),
+                rename_lock: Mutex::new(()),
+            },
+        );
+
+        let root_dir = DirectoryInode::new(
+            tmpfs.backend.assign_ino(),
+            SbRef::from(&tmpfs),
+            Permission::new(0o755),
+        );
+
+        Ok((tmpfs, root_dir))
+    }
+}
+
+struct TmpFsMountCreator;
+
+#[async_trait]
+impl MountCreator for TmpFsMountCreator {
+    async fn create_mount(&self, _source: &str, _flags: u64, mp: &Arc<Dentry>) -> KResult<Mount> {
+        let (fs, root_inode) = TmpFs::create()?;
+
+        Mount::new(mp, fs, root_inode)
+    }
+
+    fn check_signature(&self, _: &[u8]) -> KResult<bool> {
+        Ok(true)
+    }
+}
+
+pub fn init() {
+    register_filesystem("tmpfs", Arc::new(TmpFsMountCreator)).unwrap();
+}

+ 18 - 20
src/kernel/block.rs

@@ -3,7 +3,7 @@ mod mbr;
 use super::{
     constants::ENOENT,
     mem::{paging::Page, AsMemoryBlock as _},
-    vfs::DevId,
+    vfs::types::DeviceId,
 };
 use crate::kernel::constants::{EEXIST, EINVAL};
 use crate::{
@@ -14,13 +14,10 @@ use alloc::{
     collections::btree_map::{BTreeMap, Entry},
     sync::Arc,
 };
+use async_trait::async_trait;
 use core::cmp::Ordering;
 use mbr::MBRPartTable;
 
-pub fn make_device(major: u32, minor: u32) -> DevId {
-    (major << 8) & 0xff00u32 | minor & 0xffu32
-}
-
 pub struct Partition {
     pub lba_offset: u64,
     pub sector_count: u64,
@@ -30,11 +27,12 @@ pub trait PartTable {
     fn partitions(&self) -> impl Iterator<Item = Partition> + use<'_, Self>;
 }
 
+#[async_trait]
 pub trait BlockRequestQueue: Send + Sync {
     /// Maximum number of sectors that can be read in one request
     fn max_request_pages(&self) -> u64;
 
-    fn submit(&self, req: BlockDeviceRequest) -> KResult<()>;
+    async fn submit<'a>(&'a self, req: BlockDeviceRequest<'a>) -> KResult<()>;
 }
 
 enum BlockDeviceType {
@@ -42,7 +40,7 @@ enum BlockDeviceType {
         queue: Arc<dyn BlockRequestQueue>,
     },
     Partition {
-        disk_dev: DevId,
+        disk_dev: DeviceId,
         lba_offset: u64,
         queue: Arc<dyn BlockRequestQueue>,
     },
@@ -50,7 +48,7 @@ enum BlockDeviceType {
 
 pub struct BlockDevice {
     /// Unique device identifier, major and minor numbers
-    devid: DevId,
+    devid: DeviceId,
     /// Total size of the device in sectors (512 bytes each)
     sector_count: u64,
 
@@ -77,11 +75,11 @@ impl Ord for BlockDevice {
     }
 }
 
-static BLOCK_DEVICE_LIST: Spin<BTreeMap<DevId, Arc<BlockDevice>>> = Spin::new(BTreeMap::new());
+static BLOCK_DEVICE_LIST: Spin<BTreeMap<DeviceId, Arc<BlockDevice>>> = Spin::new(BTreeMap::new());
 
 impl BlockDevice {
     pub fn register_disk(
-        devid: DevId,
+        devid: DeviceId,
         size: u64,
         queue: Arc<dyn BlockRequestQueue>,
     ) -> KResult<Arc<Self>> {
@@ -97,13 +95,13 @@ impl BlockDevice {
         }
     }
 
-    pub fn get(devid: DevId) -> KResult<Arc<Self>> {
+    pub fn get(devid: DeviceId) -> KResult<Arc<Self>> {
         BLOCK_DEVICE_LIST.lock().get(&devid).cloned().ok_or(ENOENT)
     }
 }
 
 impl BlockDevice {
-    pub fn devid(&self) -> DevId {
+    pub fn devid(&self) -> DeviceId {
         self.devid
     }
 
@@ -121,7 +119,7 @@ impl BlockDevice {
         };
 
         let device = Arc::new(BlockDevice {
-            devid: make_device(self.devid >> 8, (self.devid & 0xff) + idx as u32 + 1),
+            devid: DeviceId::new(self.devid.major, self.devid.minor + idx as u16 + 1),
             sector_count: size,
             dev_type: BlockDeviceType::Partition {
                 disk_dev: self.devid,
@@ -159,7 +157,7 @@ impl BlockDevice {
     /// - `req.sector` must be within the disk size
     /// - `req.buffer` must be enough to hold the data
     ///
-    pub fn commit_request(&self, mut req: BlockDeviceRequest) -> KResult<()> {
+    pub async fn commit_request(&self, mut req: BlockDeviceRequest<'_>) -> KResult<()> {
         // Verify the request parameters.
         match &mut req {
             BlockDeviceRequest::Read { sector, count, .. } => {
@@ -184,7 +182,7 @@ impl BlockDevice {
             }
         }
 
-        self.queue().submit(req)
+        self.queue().submit(req).await
     }
 
     /// Read some from the block device, may involve some copy and fragmentation
@@ -194,7 +192,7 @@ impl BlockDevice {
     /// # Arguments
     /// `offset` - offset in bytes
     ///
-    pub fn read_some(&self, offset: usize, buffer: &mut dyn Buffer) -> KResult<FillResult> {
+    pub async fn read_some(&self, offset: usize, buffer: &mut dyn Buffer) -> KResult<FillResult> {
         let mut sector_start = offset as u64 / 512;
         let mut first_sector_offset = offset as u64 % 512;
         let mut sector_count = (first_sector_offset + buffer.total() as u64 + 511) / 512;
@@ -241,7 +239,7 @@ impl BlockDevice {
                 buffer: &pages,
             };
 
-            self.commit_request(req)?;
+            self.commit_request(req).await?;
 
             for page in pages.iter() {
                 // SAFETY: We are the only owner of the page so no one could be mutating it.
@@ -277,7 +275,7 @@ impl BlockDevice {
     /// `offset` - offset in bytes
     /// `data` - data to write
     ///
-    pub fn write_some(&self, offset: usize, data: &[u8]) -> KResult<usize> {
+    pub async fn write_some(&self, offset: usize, data: &[u8]) -> KResult<usize> {
         let mut sector_start = offset as u64 / 512;
         let mut first_sector_offset = offset as u64 % 512;
         let mut remaining_data = data;
@@ -320,7 +318,7 @@ impl BlockDevice {
                     count: sector_count,
                     buffer: pages,
                 };
-                self.commit_request(read_req)?;
+                self.commit_request(read_req).await?;
             }
 
             let mut data_offset = 0;
@@ -356,7 +354,7 @@ impl BlockDevice {
                 count: sector_count,
                 buffer: pages,
             };
-            self.commit_request(write_req)?;
+            self.commit_request(write_req).await?;
 
             let bytes_written = data_offset;
             nwritten += bytes_written;

+ 1 - 1
src/kernel/block/mbr.rs

@@ -31,7 +31,7 @@ pub struct MBRPartTable {
 impl MBRPartTable {
     pub async fn from_disk(disk: &BlockDevice) -> KResult<Self> {
         let mut mbr: UninitBuffer<MBRData> = UninitBuffer::new();
-        disk.read_some(0, &mut mbr)?.ok_or(EIO)?;
+        disk.read_some(0, &mut mbr).await?.ok_or(EIO)?;
         let mbr = mbr.assume_init()?;
 
         if mbr.magic != [0x55, 0xaa] {

+ 7 - 8
src/kernel/chardev.rs

@@ -1,10 +1,9 @@
 use super::{
-    block::make_device,
     console::get_console,
     constants::{EEXIST, EIO},
     task::{block_on, ProcessList, Thread},
     terminal::Terminal,
-    vfs::{DevId, File, FileType, TerminalFile},
+    vfs::{types::DeviceId, File, FileType, TerminalFile},
 };
 use crate::{
     io::{Buffer, Stream, StreamRead},
@@ -34,7 +33,7 @@ pub struct CharDevice {
     device: CharDeviceType,
 }
 
-static CHAR_DEVICES: Spin<BTreeMap<DevId, Arc<CharDevice>>> = Spin::new(BTreeMap::new());
+static CHAR_DEVICES: Spin<BTreeMap<DeviceId, Arc<CharDevice>>> = Spin::new(BTreeMap::new());
 
 impl CharDevice {
     pub fn read(&self, buffer: &mut dyn Buffer) -> KResult<usize> {
@@ -54,11 +53,11 @@ impl CharDevice {
         }
     }
 
-    pub fn get(devid: DevId) -> Option<Arc<CharDevice>> {
+    pub fn get(devid: DeviceId) -> Option<Arc<CharDevice>> {
         CHAR_DEVICES.lock().get(&devid).cloned()
     }
 
-    pub fn register(devid: DevId, name: Arc<str>, device: CharDeviceType) -> KResult<()> {
+    pub fn register(devid: DeviceId, name: Arc<str>, device: CharDeviceType) -> KResult<()> {
         match CHAR_DEVICES.lock().entry(devid) {
             Entry::Vacant(entry) => {
                 entry.insert(Arc::new(CharDevice { name, device }));
@@ -134,19 +133,19 @@ impl VirtualCharDevice for ConsoleDevice {
 impl CharDevice {
     pub fn init() -> KResult<()> {
         Self::register(
-            make_device(1, 3),
+            DeviceId::new(1, 3),
             Arc::from("null"),
             CharDeviceType::Virtual(Box::new(NullDevice)),
         )?;
 
         Self::register(
-            make_device(1, 5),
+            DeviceId::new(1, 5),
             Arc::from("zero"),
             CharDeviceType::Virtual(Box::new(ZeroDevice)),
         )?;
 
         Self::register(
-            make_device(5, 1),
+            DeviceId::new(5, 1),
             Arc::from("console"),
             CharDeviceType::Virtual(Box::new(ConsoleDevice)),
         )?;

+ 1 - 1
src/kernel/mem.rs

@@ -12,5 +12,5 @@ pub use access::{AsMemoryBlock, MemoryBlock, PhysAccess};
 pub(self) use mm_area::MMArea;
 pub use mm_list::{handle_kernel_page_fault, FileMapping, MMList, Mapping, Permission};
 pub use page_alloc::{GlobalPageAlloc, RawPage};
-pub use page_cache::{CachePage, CachePageStream, PageCache, PageCacheBackend};
+pub use page_cache::{CachePage, CachePageStream, PageCache, PageCacheBackendOps};
 pub use paging::{Page, PageBuffer};

+ 3 - 12
src/kernel/mem/mm_list/mapping.rs

@@ -1,24 +1,15 @@
-use core::fmt::Debug;
-
-use crate::kernel::vfs::inode::Inode;
-use alloc::sync::Arc;
+use crate::kernel::vfs::inode::{Inode, InodeUse};
 use eonix_mm::paging::PAGE_SIZE;
 
 #[derive(Debug, Clone)]
 pub struct FileMapping {
-    pub file: Arc<dyn Inode>,
+    pub file: InodeUse<dyn Inode>,
     /// Offset in the file, aligned to 4KB boundary.
     pub offset: usize,
     /// Length of the mapping. Exceeding part will be zeroed.
     pub length: usize,
 }
 
-impl Debug for dyn Inode {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-        write!(f, "Inode()")
-    }
-}
-
 #[derive(Debug, Clone)]
 pub enum Mapping {
     // private anonymous memory
@@ -28,7 +19,7 @@ pub enum Mapping {
 }
 
 impl FileMapping {
-    pub fn new(file: Arc<dyn Inode>, offset: usize, length: usize) -> Self {
+    pub fn new(file: InodeUse<dyn Inode>, offset: usize, length: usize) -> Self {
         assert_eq!(offset & (PAGE_SIZE - 1), 0);
         Self {
             file,

+ 47 - 8
src/kernel/mem/page_cache.rs

@@ -6,8 +6,10 @@ use crate::{
     GlobalPageAlloc,
 };
 use align_ext::AlignExt;
+use alloc::boxed::Box;
 use alloc::{collections::btree_map::BTreeMap, sync::Weak};
-use core::mem::ManuallyDrop;
+use async_trait::async_trait;
+use core::{future::Future, mem::ManuallyDrop};
 use eonix_hal::mm::ArchPhysAccess;
 use eonix_mm::{
     address::{PAddr, PhysAccess},
@@ -159,7 +161,8 @@ impl PageCache {
                     self.backend
                         .upgrade()
                         .unwrap()
-                        .read_page(&mut new_page, offset.align_down(PAGE_SIZE))?;
+                        .read_page(&mut new_page, offset.align_down(PAGE_SIZE))
+                        .await?;
                     pages.insert(page_id, new_page);
                 }
             }
@@ -205,7 +208,8 @@ impl PageCache {
                         self.backend
                             .upgrade()
                             .unwrap()
-                            .read_page(&mut new_page, offset.align_down(PAGE_SIZE))?;
+                            .read_page(&mut new_page, offset.align_down(PAGE_SIZE))
+                            .await?;
                         new_page
                     };
 
@@ -224,7 +228,8 @@ impl PageCache {
                 self.backend
                     .upgrade()
                     .unwrap()
-                    .write_page(&mut CachePageStream::new(*page), page_id << PAGE_SIZE_BITS)?;
+                    .write_page(&mut CachePageStream::new(*page), page_id << PAGE_SIZE_BITS)
+                    .await?;
                 page.clear_dirty();
             }
         }
@@ -286,7 +291,8 @@ impl PageCache {
                 self.backend
                     .upgrade()
                     .unwrap()
-                    .read_page(&mut new_page, offset_aligin)?;
+                    .read_page(&mut new_page, offset_aligin)
+                    .await?;
                 pages.insert(page_id, new_page);
                 new_page.0
             }
@@ -349,14 +355,47 @@ impl Stream for CachePageStream {
 // for fs, offset is file offset (floor algin to PAGE_SIZE)
 // for blkdev, offset is block idx (floor align to PAGE_SIZE / BLK_SIZE)
 // Oh no, this would make unnecessary cache
-pub trait PageCacheBackend {
-    fn read_page(&self, page: &mut CachePage, offset: usize) -> KResult<usize>;
+pub trait PageCacheBackendOps: Sized {
+    fn read_page(
+        &self,
+        page: &mut CachePage,
+        offset: usize,
+    ) -> impl Future<Output = KResult<usize>> + Send;
+
+    fn write_page(
+        &self,
+        page: &mut CachePageStream,
+        offset: usize,
+    ) -> impl Future<Output = KResult<usize>> + Send;
 
-    fn write_page(&self, page: &mut CachePageStream, offset: usize) -> KResult<usize>;
+    fn size(&self) -> usize;
+}
 
+#[async_trait]
+pub trait PageCacheBackend: Send + Sync {
+    async fn read_page(&self, page: &mut CachePage, offset: usize) -> KResult<usize>;
+    async fn write_page(&self, page: &mut CachePageStream, offset: usize) -> KResult<usize>;
     fn size(&self) -> usize;
 }
 
+#[async_trait]
+impl<T> PageCacheBackend for T
+where
+    T: PageCacheBackendOps + Send + Sync + 'static,
+{
+    async fn read_page(&self, page: &mut CachePage, offset: usize) -> KResult<usize> {
+        self.read_page(page, offset).await
+    }
+
+    async fn write_page(&self, page: &mut CachePageStream, offset: usize) -> KResult<usize> {
+        self.write_page(page, offset).await
+    }
+
+    fn size(&self) -> usize {
+        self.size()
+    }
+}
+
 pub trait PageCacheRawPage: RawPage {
     fn valid_size(&self) -> &mut usize;
 

+ 6 - 3
src/kernel/pcie/driver.rs

@@ -4,21 +4,24 @@ use super::{
 };
 use crate::{kernel::constants::EEXIST, KResult};
 use alloc::{
+    boxed::Box,
     collections::btree_map::{self, BTreeMap},
     sync::Arc,
 };
+use async_trait::async_trait;
 use eonix_sync::Spin;
 
 static PCIE_DRIVERS: Spin<BTreeMap<u32, Arc<dyn PCIDriver>>> = Spin::new(BTreeMap::new());
 
+#[async_trait]
 pub trait PCIDriver: Send + Sync {
     fn vendor_id(&self) -> u16;
     fn device_id(&self) -> u16;
 
-    fn handle_device(&self, device: Arc<PCIDevice<'static>>) -> Result<(), PciError>;
+    async fn handle_device(&self, device: Arc<PCIDevice<'static>>) -> Result<(), PciError>;
 }
 
-pub fn register_driver(driver: impl PCIDriver + 'static) -> KResult<()> {
+pub async fn register_driver(driver: impl PCIDriver + 'static) -> KResult<()> {
     let index = (driver.vendor_id() as u32) << 16 | driver.device_id() as u32;
 
     let driver = Arc::new(driver);
@@ -31,7 +34,7 @@ pub fn register_driver(driver: impl PCIDriver + 'static) -> KResult<()> {
     let devices = PCIE_DEVICES.lock().get(&index).cloned();
     if let Some(devices) = devices {
         for device in devices {
-            driver.handle_device(device)?;
+            driver.handle_device(device).await?;
         }
     };
 

+ 56 - 38
src/kernel/syscall/file_rw.rs

@@ -7,7 +7,7 @@ use crate::kernel::syscall::UserMut;
 use crate::kernel::task::Thread;
 use crate::kernel::timer::sleep;
 use crate::kernel::vfs::filearray::FD;
-use crate::kernel::vfs::inode::Mode;
+use crate::kernel::vfs::types::{DeviceId, Mode};
 use crate::kernel::vfs::{PollEvent, SeekOption};
 use crate::{
     io::{Buffer, BufferFill},
@@ -41,7 +41,7 @@ impl FromSyscallArg for AtFlags {
     }
 }
 
-fn dentry_from(
+async fn dentry_from(
     thread: &Thread,
     dirfd: FD,
     pathname: User<u8>,
@@ -52,7 +52,7 @@ fn dentry_from(
     match (path.as_cstr().to_bytes_with_nul()[0], dirfd) {
         (b'/', _) | (_, FD::AT_FDCWD) => {
             let path = Path::new(path.as_cstr().to_bytes())?;
-            Dentry::open(&thread.fs_context, path, follow_symlink)
+            Dentry::open(&thread.fs_context, path, follow_symlink).await
         }
         (0, dirfd) => {
             let dir_file = thread.files.get(dirfd).ok_or(EBADF)?;
@@ -63,7 +63,7 @@ fn dentry_from(
             let dir_file = thread.files.get(dirfd).ok_or(EBADF)?;
             let dir_dentry = dir_file.as_path().ok_or(ENOTDIR)?;
 
-            Dentry::open_at(&thread.fs_context, dir_dentry, path, follow_symlink)
+            Dentry::open_at(&thread.fs_context, dir_dentry, path, follow_symlink).await
         }
     }
 }
@@ -119,13 +119,11 @@ async fn pwrite64(fd: FD, buffer: User<u8>, count: usize, offset: usize) -> KRes
 }
 
 #[eonix_macros::define_syscall(SYS_OPENAT)]
-async fn openat(dirfd: FD, pathname: User<u8>, flags: OpenFlags, mut mode: Mode) -> KResult<FD> {
-    let dentry = dentry_from(thread, dirfd, pathname, flags.follow_symlink())?;
+async fn openat(dirfd: FD, pathname: User<u8>, flags: OpenFlags, mode: Mode) -> KResult<FD> {
+    let dentry = dentry_from(thread, dirfd, pathname, flags.follow_symlink()).await?;
+    let perm = mode.perm().mask_with(*thread.fs_context.umask.lock());
 
-    let umask = *thread.fs_context.umask.lock();
-    mode.mask_perm(!umask.non_format_bits());
-
-    thread.files.open(&dentry, flags, mode)
+    thread.files.open(&dentry, flags, perm).await
 }
 
 #[cfg(target_arch = "x86_64")]
@@ -206,7 +204,7 @@ async fn newfstatat(
         let file = thread.files.get(dirfd).ok_or(EBADF)?;
         file.as_path().ok_or(EBADF)?.clone()
     } else {
-        dentry_from(thread, dirfd, pathname, !flags.no_follow())?
+        dentry_from(thread, dirfd, pathname, !flags.no_follow()).await?
     };
 
     let statbuf = UserPointerMut::new(statbuf)?;
@@ -247,7 +245,7 @@ async fn statx(
         let file = thread.files.get(dirfd).ok_or(EBADF)?;
         file.as_path().ok_or(EBADF)?.clone()
     } else {
-        dentry_from(thread, dirfd, pathname, !flags.no_follow())?
+        dentry_from(thread, dirfd, pathname, !flags.no_follow()).await?
     };
 
     dentry.statx(&mut statx, mask)?;
@@ -257,12 +255,11 @@ async fn statx(
 }
 
 #[eonix_macros::define_syscall(SYS_MKDIRAT)]
-async fn mkdirat(dirfd: FD, pathname: User<u8>, mut mode: Mode) -> KResult<()> {
-    let umask = *thread.fs_context.umask.lock();
-    mode.mask_perm(!umask.non_format_bits());
+async fn mkdirat(dirfd: FD, pathname: User<u8>, mode: Mode) -> KResult<()> {
+    let dentry = dentry_from(thread, dirfd, pathname, true).await?;
+    let perm = mode.perm().mask_with(*thread.fs_context.umask.lock());
 
-    let dentry = dentry_from(thread, dirfd, pathname, true)?;
-    dentry.mkdir(mode)
+    dentry.mkdir(perm).await
 }
 
 #[cfg(target_arch = "x86_64")]
@@ -274,7 +271,7 @@ async fn mkdir(pathname: User<u8>, mode: u32) -> KResult<()> {
 #[eonix_macros::define_syscall(SYS_FTRUNCATE64)]
 async fn truncate64(fd: FD, length: usize) -> KResult<()> {
     let file = thread.files.get(fd).ok_or(EBADF)?;
-    file.as_path().ok_or(EBADF)?.truncate(length)
+    file.as_path().ok_or(EBADF)?.truncate(length).await
 }
 
 #[cfg(target_arch = "x86_64")]
@@ -290,7 +287,10 @@ async fn truncate(pathname: User<u8>, length: usize) -> KResult<()> {
 
 #[eonix_macros::define_syscall(SYS_UNLINKAT)]
 async fn unlinkat(dirfd: FD, pathname: User<u8>) -> KResult<()> {
-    dentry_from(thread, dirfd, pathname, false)?.unlink()
+    dentry_from(thread, dirfd, pathname, false)
+        .await?
+        .unlink()
+        .await
 }
 
 #[cfg(target_arch = "x86_64")]
@@ -302,9 +302,9 @@ async fn unlink(pathname: User<u8>) -> KResult<()> {
 #[eonix_macros::define_syscall(SYS_SYMLINKAT)]
 async fn symlinkat(target: User<u8>, dirfd: FD, linkpath: User<u8>) -> KResult<()> {
     let target = UserString::new(target)?;
-    let dentry = dentry_from(thread, dirfd, linkpath, false)?;
+    let dentry = dentry_from(thread, dirfd, linkpath, false).await?;
 
-    dentry.symlink(target.as_cstr().to_bytes())
+    dentry.symlink(target.as_cstr().to_bytes()).await
 }
 
 #[cfg(target_arch = "x86_64")]
@@ -313,18 +313,36 @@ async fn symlink(target: User<u8>, linkpath: User<u8>) -> KResult<()> {
     sys_symlinkat(thread, target, FD::AT_FDCWD, linkpath)
 }
 
+#[derive(Clone, Copy, Debug)]
+#[repr(transparent)]
+struct UserDeviceId(u32);
+
+impl FromSyscallArg for UserDeviceId {
+    fn from_arg(value: usize) -> Self {
+        Self(value as u32)
+    }
+}
+
+impl UserDeviceId {
+    pub fn into_devid(self) -> DeviceId {
+        let major = (self.0 >> 8) & 0xfff;
+        let minor = (self.0 & 0xff) | ((self.0 >> 12) & 0xfff00);
+
+        // TODO: We strip off the high 4 bits of the minor ID for now...
+        DeviceId::new(major as u16, minor as u16)
+    }
+}
+
 #[eonix_macros::define_syscall(SYS_MKNODAT)]
-async fn mknodat(dirfd: FD, pathname: User<u8>, mut mode: Mode, dev: u32) -> KResult<()> {
+async fn mknodat(dirfd: FD, pathname: User<u8>, mut mode: Mode, dev: UserDeviceId) -> KResult<()> {
     if !mode.is_blk() && !mode.is_chr() {
         return Err(EINVAL);
     }
 
-    let dentry = dentry_from(thread, dirfd, pathname, true)?;
-
-    let umask = *thread.fs_context.umask.lock();
-    mode.mask_perm(!umask.non_format_bits());
+    let dentry = dentry_from(thread, dirfd, pathname, true).await?;
+    mode.set_perm(mode.perm().mask_with(*thread.fs_context.umask.lock()));
 
-    dentry.mknod(mode, dev)
+    dentry.mknod(mode, dev.into_devid()).await
 }
 
 #[cfg(target_arch = "x86_64")]
@@ -340,10 +358,10 @@ async fn readlinkat(
     buffer: UserMut<u8>,
     bufsize: usize,
 ) -> KResult<usize> {
-    let dentry = dentry_from(thread, dirfd, pathname, false)?;
+    let dentry = dentry_from(thread, dirfd, pathname, false).await?;
     let mut buffer = UserBuffer::new(buffer, bufsize)?;
 
-    dentry.readlink(&mut buffer)
+    dentry.readlink(&mut buffer).await
 }
 
 #[cfg(target_arch = "x86_64")]
@@ -471,7 +489,7 @@ async fn faccessat(dirfd: FD, pathname: User<u8>, _mode: u32, flags: AtFlags) ->
         let file = thread.files.get(dirfd).ok_or(EBADF)?;
         file.as_path().ok_or(EBADF)?.clone()
     } else {
-        dentry_from(thread, dirfd, pathname, !flags.no_follow())?
+        dentry_from(thread, dirfd, pathname, !flags.no_follow()).await?
     };
 
     if !dentry.is_valid() {
@@ -614,12 +632,12 @@ async fn fchownat(
     gid: u32,
     flags: AtFlags,
 ) -> KResult<()> {
-    let dentry = dentry_from(thread, dirfd, pathname, !flags.no_follow())?;
+    let dentry = dentry_from(thread, dirfd, pathname, !flags.no_follow()).await?;
     if !dentry.is_valid() {
         return Err(ENOENT);
     }
 
-    dentry.chown(uid, gid)
+    dentry.chown(uid, gid).await
 }
 
 #[eonix_macros::define_syscall(SYS_FCHMODAT)]
@@ -628,14 +646,14 @@ async fn fchmodat(dirfd: FD, pathname: User<u8>, mode: Mode, flags: AtFlags) ->
         let file = thread.files.get(dirfd).ok_or(EBADF)?;
         file.as_path().ok_or(EBADF)?.clone()
     } else {
-        dentry_from(thread, dirfd, pathname, !flags.no_follow())?
+        dentry_from(thread, dirfd, pathname, !flags.no_follow()).await?
     };
 
     if !dentry.is_valid() {
         return Err(ENOENT);
     }
 
-    dentry.chmod(mode)
+    dentry.chmod(mode).await
 }
 
 #[eonix_macros::define_syscall(SYS_FCHMOD)]
@@ -654,7 +672,7 @@ async fn utimensat(
         let file = thread.files.get(dirfd).ok_or(EBADF)?;
         file.as_path().ok_or(EBADF)?.clone()
     } else {
-        dentry_from(thread, dirfd, pathname, !flags.no_follow())?
+        dentry_from(thread, dirfd, pathname, !flags.no_follow()).await?
     };
 
     if !dentry.is_valid() {
@@ -688,10 +706,10 @@ async fn renameat2(
         Err(EINVAL)?;
     }
 
-    let old_dentry = dentry_from(thread, old_dirfd, old_pathname, false)?;
-    let new_dentry = dentry_from(thread, new_dirfd, new_pathname, false)?;
+    let old_dentry = dentry_from(thread, old_dirfd, old_pathname, false).await?;
+    let new_dentry = dentry_from(thread, new_dirfd, new_pathname, false).await?;
 
-    old_dentry.rename(&new_dentry, flags)
+    old_dentry.rename(&new_dentry, flags).await
 }
 
 #[cfg(target_arch = "x86_64")]

+ 2 - 120
src/kernel/syscall/mm.rs

@@ -1,10 +1,8 @@
 use super::FromSyscallArg;
-use crate::fs::shm::{gen_shm_id, ShmFlags, IPC_PRIVATE, SHM_MANAGER};
-use crate::kernel::constants::{EBADF, EEXIST, EINVAL, ENOENT};
+use crate::kernel::constants::{EBADF, EINVAL};
 use crate::kernel::mem::FileMapping;
 use crate::kernel::task::Thread;
 use crate::kernel::vfs::filearray::FD;
-use crate::kernel::vfs::inode::Mode;
 use crate::{
     kernel::{
         constants::{UserMmapFlags, UserMmapProtocol},
@@ -66,13 +64,7 @@ async fn do_mmap2(
         if !is_shared {
             Mapping::Anonymous
         } else {
-            // The mode is unimportant here, since we are checking prot in mm_area.
-            let shared_area = SHM_MANAGER.lock().await.create_shared_area(
-                len,
-                thread.process.pid,
-                Mode::REG.perm(0o777),
-            );
-            Mapping::File(FileMapping::new(shared_area.area.clone(), 0, len))
+            unimplemented!("mmap MAP_ANONYMOUS | MAP_SHARED");
         }
     } else {
         let file = thread
@@ -179,114 +171,4 @@ async fn mprotect(addr: usize, len: usize, prot: UserMmapProtocol) -> KResult<()
         .await
 }
 
-#[eonix_macros::define_syscall(SYS_SHMGET)]
-async fn shmget(key: usize, size: usize, shmflg: u32) -> KResult<u32> {
-    let size = size.align_up(PAGE_SIZE);
-
-    let mut shm_manager = SHM_MANAGER.lock().await;
-    let shmid = gen_shm_id(key)?;
-
-    let mode = Mode::REG.perm(shmflg);
-    let shmflg = ShmFlags::from_bits_truncate(shmflg);
-
-    if key == IPC_PRIVATE {
-        let new_shm = shm_manager.create_shared_area(size, thread.process.pid, mode);
-        shm_manager.insert(shmid, new_shm);
-        return Ok(shmid);
-    }
-
-    if let Some(_) = shm_manager.get(shmid) {
-        if shmflg.contains(ShmFlags::IPC_CREAT | ShmFlags::IPC_EXCL) {
-            return Err(EEXIST);
-        }
-
-        return Ok(shmid);
-    }
-
-    if shmflg.contains(ShmFlags::IPC_CREAT) {
-        let new_shm = shm_manager.create_shared_area(size, thread.process.pid, mode);
-        shm_manager.insert(shmid, new_shm);
-        return Ok(shmid);
-    }
-
-    Err(ENOENT)
-}
-
-#[eonix_macros::define_syscall(SYS_SHMAT)]
-async fn shmat(shmid: u32, addr: usize, shmflg: u32) -> KResult<usize> {
-    let mm_list = &thread.process.mm_list;
-    let shm_manager = SHM_MANAGER.lock().await;
-    let shm_area = shm_manager.get(shmid).ok_or(EINVAL)?;
-
-    // Why is this not used?
-    let _mode = shmflg & 0o777;
-    let shmflg = ShmFlags::from_bits_truncate(shmflg);
-
-    let mut permission = Permission {
-        read: true,
-        write: true,
-        execute: false,
-    };
-
-    if shmflg.contains(ShmFlags::SHM_EXEC) {
-        permission.execute = true;
-    }
-    if shmflg.contains(ShmFlags::SHM_RDONLY) {
-        permission.write = false;
-    }
-
-    let size = shm_area.shmid_ds.shm_segsz;
-
-    let mapping = Mapping::File(FileMapping {
-        file: shm_area.area.clone(),
-        offset: 0,
-        length: size,
-    });
-
-    let addr = if addr != 0 {
-        if addr % PAGE_SIZE != 0 && !shmflg.contains(ShmFlags::SHM_RND) {
-            return Err(EINVAL);
-        }
-        let addr = VAddr::from(addr.align_down(PAGE_SIZE));
-        mm_list
-            .mmap_fixed(addr, size, mapping, permission, true)
-            .await
-    } else {
-        mm_list
-            .mmap_hint(VAddr::NULL, size, mapping, permission, true)
-            .await
-    }?;
-
-    thread.process.shm_areas.lock().insert(addr, size);
-
-    Ok(addr.addr())
-}
-
-#[eonix_macros::define_syscall(SYS_SHMDT)]
-async fn shmdt(addr: usize) -> KResult<()> {
-    let addr = VAddr::from(addr);
-
-    let size = {
-        let mut shm_areas = thread.process.shm_areas.lock();
-        let size = *shm_areas.get(&addr).ok_or(EINVAL)?;
-        shm_areas.remove(&addr);
-
-        size
-    };
-
-    thread.process.mm_list.unmap(addr, size).await
-}
-
-#[eonix_macros::define_syscall(SYS_SHMCTL)]
-async fn shmctl(_shmid: u32, _op: i32, _shmid_ds: usize) -> KResult<usize> {
-    // TODO
-    Ok(0)
-}
-
-#[eonix_macros::define_syscall(SYS_MEMBARRIER)]
-async fn membarrier(_cmd: usize, _flags: usize) -> KResult<()> {
-    // TODO
-    Ok(())
-}
-
 pub fn keep_alive() {}

+ 12 - 8
src/kernel/syscall/procops.rs

@@ -16,7 +16,7 @@ use crate::kernel::task::{parse_futexop, CloneArgs};
 use crate::kernel::timer::sleep;
 use crate::kernel::user::UserString;
 use crate::kernel::user::{UserPointer, UserPointerMut};
-use crate::kernel::vfs::inode::Mode;
+use crate::kernel::vfs::types::Permission;
 use crate::kernel::vfs::{self, dentry::Dentry};
 use crate::path::Path;
 use crate::{kernel::user::UserBuffer, prelude::*};
@@ -100,10 +100,11 @@ async fn clock_nanosleep(
 }
 
 #[eonix_macros::define_syscall(SYS_UMASK)]
-async fn umask(mask: Mode) -> KResult<Mode> {
-    let mut umask = thread.fs_context.umask.lock();
+async fn umask(raw_new_mask: u32) -> KResult<u32> {
+    let new_mask = Permission::new(!raw_new_mask);
+    let old_mask = core::mem::replace(&mut *thread.fs_context.umask.lock(), new_mask);
 
-    Ok(core::mem::replace(&mut *umask, mask.non_format()))
+    Ok(!old_mask.bits())
 }
 
 #[eonix_macros::define_syscall(SYS_GETCWD)]
@@ -124,7 +125,7 @@ async fn chdir(path: User<u8>) -> KResult<()> {
     let path = UserString::new(path)?;
     let path = Path::new(path.as_cstr().to_bytes())?;
 
-    let dentry = Dentry::open(&thread.fs_context, path, true)?;
+    let dentry = Dentry::open(&thread.fs_context, path, true).await?;
     if !dentry.is_valid() {
         return Err(ENOENT);
     }
@@ -159,7 +160,8 @@ async fn mount(source: User<u8>, target: User<u8>, fstype: User<u8>, flags: usiz
         &thread.fs_context,
         Path::new(target.as_cstr().to_bytes())?,
         true,
-    )?;
+    )
+    .await?;
 
     if !mountpoint.is_valid() {
         return Err(ENOENT);
@@ -172,6 +174,7 @@ async fn mount(source: User<u8>, target: User<u8>, fstype: User<u8>, flags: usiz
         fstype.as_cstr().to_str().map_err(|_| EINVAL)?,
         flags as u64,
     )
+    .await
 }
 
 fn get_strings(mut ptr_strings: UserPointer<'_, PtrT>) -> KResult<Vec<CString>> {
@@ -199,14 +202,15 @@ async fn execve(exec: User<u8>, argv: User<PtrT>, envp: User<PtrT>) -> KResult<S
     let argv = get_strings(UserPointer::new(argv)?)?;
     let envp = get_strings(UserPointer::new(envp)?)?;
 
-    let dentry = Dentry::open(&thread.fs_context, Path::new(exec.as_bytes())?, true)?;
+    let dentry = Dentry::open(&thread.fs_context, Path::new(exec.as_bytes())?, true).await?;
     if !dentry.is_valid() {
         Err(ENOENT)?;
     }
 
     // TODO: When `execve` is called by one of the threads in a process, the other threads
     //       should be terminated and `execve` is performed in the thread group leader.
-    let load_info = ProgramLoader::parse(&thread.fs_context, exec, dentry.clone(), argv, envp)?
+    let load_info = ProgramLoader::parse(&thread.fs_context, exec, dentry.clone(), argv, envp)
+        .await?
         .load()
         .await?;
 

+ 19 - 15
src/kernel/task/loader/elf.rs

@@ -193,9 +193,9 @@ impl<E: ElfArch> Elf<E> {
         Err(ENOEXEC)
     }
 
-    fn parse(elf_file: Arc<Dentry>) -> KResult<Self> {
+    async fn parse(elf_file: Arc<Dentry>) -> KResult<Self> {
         let mut elf_header = UninitBuffer::<ElfHeader<E::Ea>>::new();
-        elf_file.read(&mut elf_header, 0)?;
+        elf_file.read(&mut elf_header, 0).await?;
 
         let elf_header = elf_header.assume_init().map_err(|_| ENOEXEC)?;
 
@@ -203,10 +203,12 @@ impl<E: ElfArch> Elf<E> {
         let ph_count = elf_header.pt2.ph_count;
 
         let mut program_headers = vec![E::Ph::default(); ph_count as usize];
-        elf_file.read(
-            &mut ByteBuffer::from(program_headers.as_mut_slice()),
-            ph_offset.into_usize(),
-        )?;
+        elf_file
+            .read(
+                &mut ByteBuffer::from(program_headers.as_mut_slice()),
+                ph_offset.into_usize(),
+            )
+            .await?;
 
         Ok(Self {
             file: elf_file,
@@ -390,12 +392,13 @@ impl<E: ElfArch> Elf<E> {
     }
 
     async fn load_ldso(&self, mm_list: &MMList) -> KResult<Option<LdsoLoadInfo>> {
-        let ldso_path = self.ldso_path()?;
+        let ldso_path = self.ldso_path().await?;
 
         if let Some(ldso_path) = ldso_path {
             let fs_context = FsContext::global();
-            let ldso_file = Dentry::open(fs_context, Path::new(ldso_path.as_bytes())?, true)?;
-            let ldso_elf = Elf::<E>::parse(ldso_file)?;
+            let ldso_file =
+                Dentry::open(fs_context, Path::new(ldso_path.as_bytes())?, true).await?;
+            let ldso_elf = Elf::<E>::parse(ldso_file).await?;
 
             let base = VAddr::from(E::LDSO_BASE_ADDR);
 
@@ -420,7 +423,7 @@ impl<E: ElfArch> Elf<E> {
         mm_list.map_vdso().await
     }
 
-    fn ldso_path(&self) -> KResult<Option<String>> {
+    async fn ldso_path(&self) -> KResult<Option<String>> {
         for program_header in &self.program_headers {
             let type_ = program_header.type_().map_err(|_| ENOEXEC)?;
 
@@ -430,7 +433,8 @@ impl<E: ElfArch> Elf<E> {
 
                 let mut ldso_vec = vec![0u8; file_size - 1]; // -1 due to '\0'
                 self.file
-                    .read(&mut ByteBuffer::from(ldso_vec.as_mut_slice()), file_offset)?;
+                    .read(&mut ByteBuffer::from(ldso_vec.as_mut_slice()), file_offset)
+                    .await?;
                 let ldso_path = String::from_utf8(ldso_vec).map_err(|_| ENOEXEC)?;
                 return Ok(Some(ldso_path));
             }
@@ -445,16 +449,16 @@ pub enum ELF {
 }
 
 impl ELF {
-    pub fn parse(elf_file: Arc<Dentry>) -> KResult<Self> {
+    pub async fn parse(elf_file: Arc<Dentry>) -> KResult<Self> {
         let mut header_pt1 = UninitBuffer::<HeaderPt1>::new();
-        elf_file.read(&mut header_pt1, 0)?;
+        elf_file.read(&mut header_pt1, 0).await?;
 
         let header_pt1 = header_pt1.assume_init().map_err(|_| ENOEXEC)?;
         assert_eq!(header_pt1.magic, ELF_MAGIC);
 
         match header_pt1.class() {
-            Class::ThirtyTwo => Ok(ELF::Elf32(Elf::parse(elf_file)?)),
-            Class::SixtyFour => Ok(ELF::Elf64(Elf::parse(elf_file)?)),
+            Class::ThirtyTwo => Ok(ELF::Elf32(Elf::parse(elf_file).await?)),
+            Class::SixtyFour => Ok(ELF::Elf64(Elf::parse(elf_file).await?)),
             _ => Err(ENOEXEC),
         }
     }

+ 8 - 5
src/kernel/task/loader/mod.rs

@@ -33,7 +33,7 @@ pub struct ProgramLoader {
 }
 
 impl ProgramLoader {
-    pub fn parse(
+    pub async fn parse(
         fs_context: &FsContext,
         mut exec_path: CString,
         mut file: Arc<Dentry>,
@@ -49,12 +49,15 @@ impl ProgramLoader {
             }
 
             let mut magic = [0; 4];
-            file.read(&mut ByteBuffer::new(magic.as_mut_slice()), 0)?;
+            file.read(&mut ByteBuffer::new(magic.as_mut_slice()), 0)
+                .await?;
 
             match magic {
                 [b'#', b'!', ..] => {
                     let mut interpreter_line = [0; 256];
-                    let nread = file.read(&mut ByteBuffer::new(&mut interpreter_line), 0)?;
+                    let nread = file
+                        .read(&mut ByteBuffer::new(&mut interpreter_line), 0)
+                        .await?;
 
                     // There is a tiny time gap between reading the magic number and
                     // reading the interpreter line, so we need to check if the line
@@ -77,7 +80,7 @@ impl ProgramLoader {
                     }
 
                     let path = Path::new(interpreter_name.as_bytes())?;
-                    file = Dentry::open(fs_context, path, true)?;
+                    file = Dentry::open(fs_context, path, true).await?;
 
                     args.insert(0, interpreter_name.clone());
                     if let Some(arg) = interpreter_arg {
@@ -92,7 +95,7 @@ impl ProgramLoader {
 
                     exec_path = interpreter_name;
                 }
-                ELF_MAGIC => break ELF::parse(file)?,
+                ELF_MAGIC => break ELF::parse(file).await?,
                 _ => return Err(ENOEXEC),
             }
 

+ 2 - 0
src/kernel/timer.rs

@@ -76,6 +76,8 @@ impl Ticks {
 }
 
 impl Instant {
+    pub const UNIX_EPOCH: Self = Self::default();
+
     pub const fn default() -> Self {
         Instant {
             secs_since_epoch: 0,

+ 150 - 145
src/kernel/vfs/dentry.rs

@@ -1,8 +1,9 @@
 pub mod dcache;
 
 use super::{
-    inode::{Ino, Inode, Mode, RenameData, WriteOffset},
-    DevId, FsContext,
+    inode::{Ino, Inode, InodeUse, RenameData, WriteOffset},
+    types::{DeviceId, Format, Mode, Permission},
+    FsContext,
 };
 use crate::{
     hash::KernelHasher,
@@ -14,22 +15,31 @@ use crate::{
 };
 use crate::{
     io::Stream,
-    kernel::constants::{EEXIST, EINVAL, EIO, EISDIR, ELOOP, ENOENT, ENOTDIR, EPERM, ERANGE},
+    kernel::constants::{EEXIST, EINVAL, EISDIR, ELOOP, ENOENT, ENOTDIR, EPERM, ERANGE},
 };
-use alloc::sync::{Arc, Weak};
+use alloc::sync::Arc;
 use core::{
     fmt,
+    future::Future,
     hash::{BuildHasher, BuildHasherDefault, Hasher},
-    ops::ControlFlow,
+    pin::Pin,
     sync::atomic::{AtomicPtr, AtomicU64, Ordering},
 };
 use eonix_sync::LazyLock;
 use pointers::BorrowedArc;
 use posix_types::{namei::RenameFlags, open::OpenFlags, result::PosixError, stat::StatX};
 
+#[derive(PartialEq, Eq)]
+enum DentryKind {
+    Regular,
+    Directory,
+    Symlink,
+    Mountpoint,
+}
+
 struct DentryData {
-    inode: Arc<dyn Inode>,
-    flags: u64,
+    inode: InodeUse<dyn Inode>,
+    kind: DentryKind,
 }
 
 /// # Safety
@@ -79,12 +89,6 @@ impl fmt::Debug for Dentry {
     }
 }
 
-const D_DIRECTORY: u64 = 1;
-#[allow(dead_code)]
-const D_MOUNTPOINT: u64 = 2;
-const D_SYMLINK: u64 = 4;
-const D_REGULAR: u64 = 8;
-
 impl RCUNode<Dentry> for Dentry {
     fn rcu_prev(&self) -> &AtomicPtr<Self> {
         &self.prev
@@ -116,11 +120,11 @@ impl Dentry {
         self.hash.store(hash, Ordering::Relaxed);
     }
 
-    fn find(self: &Arc<Self>, name: &[u8]) -> KResult<Arc<Self>> {
+    async fn find(self: &Arc<Self>, name: &[u8]) -> KResult<Arc<Self>> {
         let data = self.data.load();
         let data = data.as_ref().ok_or(ENOENT)?;
 
-        if data.flags & D_DIRECTORY == 0 {
+        if data.kind != DentryKind::Directory {
             return Err(ENOTDIR);
         }
 
@@ -141,7 +145,7 @@ impl Dentry {
                     return Ok(found);
                 }
 
-                dcache::d_try_revalidate(&dentry);
+                let _ = dcache::d_try_revalidate(&dentry).await;
                 dcache::d_add(dentry.clone());
 
                 Ok(dentry)
@@ -192,8 +196,8 @@ impl Dentry {
             .map_or(core::ptr::null(), |parent| Arc::as_ptr(&parent))
     }
 
-    fn save_data(&self, inode: Arc<dyn Inode>, flags: u64) -> KResult<()> {
-        let new = DentryData { inode, flags };
+    fn save(&self, inode: InodeUse<dyn Inode>, kind: DentryKind) {
+        let new = DentryData { inode, kind };
 
         // TODO!!!: We don't actually need to use `RCUPointer` here
         // Safety: this function may only be called from `create`-like functions which requires the
@@ -201,41 +205,35 @@ impl Dentry {
         // can't get a reference to the old data.
         let old = unsafe { self.data.swap(Some(Arc::new(new))) };
         assert!(old.is_none());
-
-        Ok(())
     }
 
-    pub fn save_reg(&self, file: Arc<dyn Inode>) -> KResult<()> {
-        self.save_data(file, D_REGULAR)
-    }
-
-    pub fn save_symlink(&self, link: Arc<dyn Inode>) -> KResult<()> {
-        self.save_data(link, D_SYMLINK)
+    pub fn fill(&self, file: InodeUse<dyn Inode>) {
+        match file.format() {
+            Format::REG | Format::BLK | Format::CHR => self.save(file, DentryKind::Regular),
+            Format::DIR => self.save(file, DentryKind::Directory),
+            Format::LNK => self.save(file, DentryKind::Symlink),
+        }
     }
 
-    pub fn save_dir(&self, dir: Arc<dyn Inode>) -> KResult<()> {
-        self.save_data(dir, D_DIRECTORY)
+    pub fn inode(&self) -> Option<InodeUse<dyn Inode>> {
+        self.data.load().as_ref().map(|data| data.inode.clone())
     }
 
-    pub fn get_inode(&self) -> KResult<Arc<dyn Inode>> {
-        self.data
-            .load()
-            .as_ref()
-            .ok_or(ENOENT)
-            .map(|data| data.inode.clone())
+    pub fn get_inode(&self) -> KResult<InodeUse<dyn Inode>> {
+        self.inode().ok_or(ENOENT)
     }
 
     pub fn is_directory(&self) -> bool {
         let data = self.data.load();
         data.as_ref()
-            .map_or(false, |data| data.flags & D_DIRECTORY != 0)
+            .map_or(false, |data| data.kind == DentryKind::Directory)
     }
 
     pub fn is_valid(&self) -> bool {
         self.data.load().is_some()
     }
 
-    pub fn open_check(self: &Arc<Self>, flags: OpenFlags, mode: Mode) -> KResult<()> {
+    pub async fn open_check(self: &Arc<Self>, flags: OpenFlags, perm: Permission) -> KResult<()> {
         let data = self.data.load();
 
         if data.is_some() {
@@ -250,7 +248,7 @@ impl Dentry {
             }
 
             let parent = self.parent().get_inode()?;
-            parent.creat(self, mode)
+            parent.create(self, perm).await
         }
     }
 }
@@ -260,110 +258,120 @@ impl Dentry {
         context: &FsContext,
         dentry: Arc<Self>,
         nrecur: u32,
-    ) -> KResult<Arc<Self>> {
-        if nrecur >= 16 {
-            return Err(ELOOP);
-        }
+    ) -> Pin<Box<impl Future<Output = KResult<Arc<Self>>> + use<'_>>> {
+        Box::pin(async move {
+            if nrecur >= 16 {
+                return Err(ELOOP);
+            }
 
-        let data = dentry.data.load();
-        let data = data.as_ref().ok_or(ENOENT)?;
+            let data = dentry.data.load();
+            let data = data.as_ref().ok_or(ENOENT)?;
 
-        match data.flags {
-            flags if flags & D_REGULAR != 0 => Err(ENOTDIR),
-            flags if flags & D_DIRECTORY != 0 => Ok(dentry),
-            flags if flags & D_SYMLINK != 0 => {
-                let mut buffer = [0u8; 256];
-                let mut buffer = ByteBuffer::new(&mut buffer);
+            match data.kind {
+                DentryKind::Regular => Err(ENOTDIR),
+                DentryKind::Directory => Ok(dentry),
+                DentryKind::Symlink => {
+                    let mut buffer = [0u8; 256];
+                    let mut buffer = ByteBuffer::new(&mut buffer);
 
-                data.inode.readlink(&mut buffer)?;
-                let path = Path::new(buffer.data())?;
+                    data.inode.readlink(&mut buffer).await?;
+                    let path = Path::new(buffer.data())?;
 
-                let dentry =
-                    Self::open_recursive(context, &dentry.parent(), path, true, nrecur + 1)?;
+                    let dentry =
+                        Self::open_recursive(context, &dentry.parent(), path, true, nrecur + 1)
+                            .await?;
 
-                Self::resolve_directory(context, dentry, nrecur + 1)
+                    Self::resolve_directory(context, dentry, nrecur + 1).await
+                }
+                _ => panic!("Invalid dentry flags"),
             }
-            _ => panic!("Invalid dentry flags"),
-        }
+        })
     }
 
-    pub fn open_recursive(
-        context: &FsContext,
-        cwd: &Arc<Self>,
-        path: Path,
+    pub fn open_recursive<'r, 'a: 'r, 'b: 'r, 'c: 'r>(
+        context: &'a FsContext,
+        cwd: &'b Arc<Self>,
+        path: Path<'c>,
         follow: bool,
         nrecur: u32,
-    ) -> KResult<Arc<Self>> {
-        // too many recursive search layers will cause stack overflow
-        // so we use 16 for now
-        if nrecur >= 16 {
-            return Err(ELOOP);
-        }
+    ) -> Pin<Box<impl Future<Output = KResult<Arc<Self>>> + 'r>> {
+        Box::pin(async move {
+            // too many recursive search layers will cause stack overflow
+            // so we use 16 for now
+            if nrecur >= 16 {
+                return Err(ELOOP);
+            }
 
-        let mut cwd = if path.is_absolute() {
-            context.fsroot.clone()
-        } else {
-            cwd.clone()
-        };
+            let mut cwd = if path.is_absolute() {
+                context.fsroot.clone()
+            } else {
+                cwd.clone()
+            };
 
-        for item in path.iter() {
-            if let PathComponent::TrailingEmpty = item {
-                if cwd.data.load().as_ref().is_none() {
-                    return Ok(cwd);
+            for item in path.iter() {
+                if let PathComponent::TrailingEmpty = item {
+                    if cwd.data.load().as_ref().is_none() {
+                        return Ok(cwd);
+                    }
                 }
-            }
 
-            cwd = Self::resolve_directory(context, cwd, nrecur)?;
+                cwd = Self::resolve_directory(context, cwd, nrecur).await?;
 
-            match item {
-                PathComponent::TrailingEmpty | PathComponent::Current => {} // pass
-                PathComponent::Parent => {
-                    if !cwd.hash_eq(&context.fsroot) {
-                        let parent = cwd.parent().clone();
-                        cwd = Self::resolve_directory(context, parent, nrecur)?;
+                match item {
+                    PathComponent::TrailingEmpty | PathComponent::Current => {} // pass
+                    PathComponent::Parent => {
+                        if !cwd.hash_eq(&context.fsroot) {
+                            let parent = cwd.parent().clone();
+                            cwd = Self::resolve_directory(context, parent, nrecur).await?;
+                        }
+                        continue;
+                    }
+                    PathComponent::Name(name) => {
+                        cwd = cwd.find(name).await?;
                     }
-                    continue;
-                }
-                PathComponent::Name(name) => {
-                    cwd = cwd.find(name)?;
                 }
             }
-        }
 
-        if follow {
-            let data = cwd.data.load();
+            if follow {
+                let data = cwd.data.load();
 
-            if let Some(data) = data.as_ref() {
-                if data.flags & D_SYMLINK != 0 {
-                    let data = cwd.data.load();
-                    let data = data.as_ref().unwrap();
-                    let mut buffer = [0u8; 256];
-                    let mut buffer = ByteBuffer::new(&mut buffer);
+                if let Some(data) = data.as_ref() {
+                    if data.kind == DentryKind::Symlink {
+                        let data = cwd.data.load();
+                        let data = data.as_ref().unwrap();
+                        let mut buffer = [0u8; 256];
+                        let mut buffer = ByteBuffer::new(&mut buffer);
 
-                    data.inode.readlink(&mut buffer)?;
-                    let path = Path::new(buffer.data())?;
+                        data.inode.readlink(&mut buffer).await?;
+                        let path = Path::new(buffer.data())?;
 
-                    let parent = cwd.parent().clone();
-                    cwd = Self::open_recursive(context, &parent, path, true, nrecur + 1)?;
+                        let parent = cwd.parent().clone();
+                        cwd =
+                            Self::open_recursive(context, &parent, path, true, nrecur + 1).await?;
+                    }
                 }
             }
-        }
 
-        Ok(cwd)
+            Ok(cwd)
+        })
     }
 
-    pub fn open(context: &FsContext, path: Path, follow_symlinks: bool) -> KResult<Arc<Self>> {
+    pub async fn open(
+        context: &FsContext,
+        path: Path<'_>,
+        follow_symlinks: bool,
+    ) -> KResult<Arc<Self>> {
         let cwd = context.cwd.lock().clone();
-        Dentry::open_recursive(context, &cwd, path, follow_symlinks, 0)
+        Dentry::open_recursive(context, &cwd, path, follow_symlinks, 0).await
     }
 
-    pub fn open_at(
+    pub async fn open_at(
         context: &FsContext,
         at: &Arc<Self>,
-        path: Path,
+        path: Path<'_>,
         follow_symlinks: bool,
     ) -> KResult<Arc<Self>> {
-        Dentry::open_recursive(context, at, path, follow_symlinks, 0)
+        Dentry::open_recursive(context, at, path, follow_symlinks, 0).await
     }
 
     pub fn get_path(
@@ -405,18 +413,18 @@ impl Dentry {
 }
 
 impl Dentry {
-    pub fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize> {
+    pub async fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize> {
         let inode = self.get_inode()?;
 
         // Safety: Changing mode alone will have no effect on the file's contents
-        match inode.mode.load().format() {
-            Mode::DIR => Err(EISDIR),
-            Mode::REG => inode.read(buffer, offset),
-            Mode::BLK => {
+        match inode.format() {
+            Format::DIR => Err(EISDIR),
+            Format::REG => inode.read(buffer, offset).await,
+            Format::BLK => {
                 let device = BlockDevice::get(inode.devid()?)?;
-                Ok(device.read_some(offset, buffer)?.allow_partial())
+                Ok(device.read_some(offset, buffer).await?.allow_partial())
             }
-            Mode::CHR => {
+            Format::CHR => {
                 let device = CharDevice::get(inode.devid()?).ok_or(EPERM)?;
                 device.read(buffer)
             }
@@ -424,32 +432,32 @@ impl Dentry {
         }
     }
 
-    pub fn write(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult<usize> {
+    pub async fn write(&self, stream: &mut dyn Stream, offset: WriteOffset<'_>) -> KResult<usize> {
         let inode = self.get_inode()?;
         // Safety: Changing mode alone will have no effect on the file's contents
-        match inode.mode.load().format() {
-            Mode::DIR => Err(EISDIR),
-            Mode::REG => inode.write(stream, offset),
-            Mode::BLK => Err(EINVAL), // TODO
-            Mode::CHR => CharDevice::get(inode.devid()?).ok_or(EPERM)?.write(stream),
+        match inode.format() {
+            Format::DIR => Err(EISDIR),
+            Format::REG => inode.write(stream, offset).await,
+            Format::BLK => Err(EINVAL), // TODO
+            Format::CHR => CharDevice::get(inode.devid()?).ok_or(EPERM)?.write(stream),
             _ => Err(EINVAL),
         }
     }
 
-    pub fn readdir<F>(&self, offset: usize, mut callback: F) -> KResult<usize>
+    pub async fn readdir<F>(&self, offset: usize, mut for_each_entry: F) -> KResult<KResult<usize>>
     where
-        F: FnMut(&[u8], Ino) -> KResult<ControlFlow<(), ()>>,
+        F: FnMut(&[u8], Ino) -> KResult<bool> + Send,
     {
         let dir = self.get_inode()?;
-        dir.do_readdir(offset, &mut callback)
+        dir.readdir(offset, &mut for_each_entry).await
     }
 
-    pub fn mkdir(&self, mode: Mode) -> KResult<()> {
+    pub async fn mkdir(&self, perm: Permission) -> KResult<()> {
         if self.get_inode().is_ok() {
             Err(EEXIST)
         } else {
             let dir = self.parent().get_inode()?;
-            dir.mkdir(self, mode)
+            dir.mkdir(self, perm).await
         }
     }
 
@@ -457,50 +465,50 @@ impl Dentry {
         self.get_inode()?.statx(stat, mask)
     }
 
-    pub fn truncate(&self, size: usize) -> KResult<()> {
-        self.get_inode()?.truncate(size)
+    pub async fn truncate(&self, size: usize) -> KResult<()> {
+        self.get_inode()?.truncate(size).await
     }
 
-    pub fn unlink(self: &Arc<Self>) -> KResult<()> {
+    pub async fn unlink(self: &Arc<Self>) -> KResult<()> {
         if self.get_inode().is_err() {
             Err(ENOENT)
         } else {
             let dir = self.parent().get_inode()?;
-            dir.unlink(self)
+            dir.unlink(self).await
         }
     }
 
-    pub fn symlink(self: &Arc<Self>, link: &[u8]) -> KResult<()> {
+    pub async fn symlink(self: &Arc<Self>, link: &[u8]) -> KResult<()> {
         if self.get_inode().is_ok() {
             Err(EEXIST)
         } else {
             let dir = self.parent().get_inode()?;
-            dir.symlink(self, link)
+            dir.symlink(self, link).await
         }
     }
 
-    pub fn readlink(&self, buffer: &mut dyn Buffer) -> KResult<usize> {
-        self.get_inode()?.readlink(buffer)
+    pub async fn readlink(&self, buffer: &mut dyn Buffer) -> KResult<usize> {
+        self.get_inode()?.readlink(buffer).await
     }
 
-    pub fn mknod(&self, mode: Mode, devid: DevId) -> KResult<()> {
+    pub async fn mknod(&self, mode: Mode, devid: DeviceId) -> KResult<()> {
         if self.get_inode().is_ok() {
             Err(EEXIST)
         } else {
             let dir = self.parent().get_inode()?;
-            dir.mknod(self, mode, devid)
+            dir.mknod(self, mode, devid).await
         }
     }
 
-    pub fn chmod(&self, mode: Mode) -> KResult<()> {
-        self.get_inode()?.chmod(mode)
+    pub async fn chmod(&self, mode: Mode) -> KResult<()> {
+        self.get_inode()?.chmod(mode).await
     }
 
-    pub fn chown(&self, uid: u32, gid: u32) -> KResult<()> {
-        self.get_inode()?.chown(uid, gid)
+    pub async fn chown(&self, uid: u32, gid: u32) -> KResult<()> {
+        self.get_inode()?.chown(uid, gid).await
     }
 
-    pub fn rename(self: &Arc<Self>, new: &Arc<Self>, flags: RenameFlags) -> KResult<()> {
+    pub async fn rename(self: &Arc<Self>, new: &Arc<Self>, flags: RenameFlags) -> KResult<()> {
         if Arc::ptr_eq(self, new) {
             return Ok(());
         }
@@ -509,22 +517,19 @@ impl Dentry {
         let new_parent = new.parent().get_inode()?;
 
         // If the two dentries are not in the same filesystem, return EXDEV.
-        if !Weak::ptr_eq(&old_parent.vfs, &new_parent.vfs) {
+        if old_parent.sbref().eq(&new_parent.sbref()) {
             Err(PosixError::EXDEV)?;
         }
 
-        let vfs = old_parent.vfs.upgrade().ok_or(EIO)?;
-
         let rename_data = RenameData {
             old_dentry: self,
             new_dentry: new,
             new_parent,
-            vfs,
             is_exchange: flags.contains(RenameFlags::RENAME_EXCHANGE),
             no_replace: flags.contains(RenameFlags::RENAME_NOREPLACE),
         };
 
         // Delegate to the parent directory's rename implementation
-        old_parent.rename(rename_data)
+        old_parent.rename(rename_data).await
     }
 }

+ 7 - 22
src/kernel/vfs/dentry/dcache.rs

@@ -1,7 +1,5 @@
-use super::{Dentry, Inode};
+use super::Dentry;
 use crate::kernel::constants::ENOENT;
-use crate::kernel::task::block_on;
-use crate::kernel::vfs::inode::Mode;
 use crate::rcu::RCUPointer;
 use crate::{
     prelude::*,
@@ -41,27 +39,14 @@ pub fn d_find_fast(dentry: &Dentry) -> Option<Arc<Dentry>> {
 /// Call `lookup()` on the parent inode to try find if the dentry points to a valid inode
 ///
 /// Silently fail without any side effects
-pub fn d_try_revalidate(dentry: &Arc<Dentry>) {
-    let _lock = block_on(D_EXCHANGE_LOCK.lock());
-
-    (|| -> KResult<()> {
-        let parent = dentry.parent().get_inode()?;
-        let inode = parent.lookup(dentry)?.ok_or(ENOENT)?;
+pub async fn d_try_revalidate(dentry: &Arc<Dentry>) -> KResult<()> {
+    let _lock = D_EXCHANGE_LOCK.lock().await;
 
-        d_save(dentry, inode)
-    })()
-    .unwrap_or_default();
-}
+    let parent = dentry.parent().get_inode()?;
+    let inode = parent.lookup(dentry).await?.ok_or(ENOENT)?;
 
-/// Save the inode to the dentry.
-///
-/// Dentry flags will be determined by the inode's mode.
-pub fn d_save(dentry: &Arc<Dentry>, inode: Arc<dyn Inode>) -> KResult<()> {
-    match inode.mode.load().format() {
-        Mode::DIR => dentry.save_dir(inode),
-        Mode::LNK => dentry.save_symlink(inode),
-        _ => dentry.save_reg(inode),
-    }
+    dentry.fill(inode);
+    Ok(())
 }
 
 /// Replace the old dentry with the new one in the dcache

+ 67 - 72
src/kernel/vfs/file/inode_file.rs

@@ -5,13 +5,13 @@ use crate::{
         constants::{EBADF, EFAULT, ENOTDIR, EOVERFLOW, ESPIPE},
         vfs::{
             dentry::Dentry,
-            inode::{Inode, Mode, WriteOffset},
+            inode::{Inode, InodeUse, WriteOffset},
+            types::Format,
         },
     },
     prelude::KResult,
 };
 use alloc::sync::Arc;
-use core::{ops::ControlFlow, sync::atomic::Ordering};
 use eonix_sync::Mutex;
 use posix_types::{
     getdent::{UserDirent, UserDirent64},
@@ -25,7 +25,7 @@ pub struct InodeFile {
     pub a: bool,
     /// Only a few modes those won't possibly change are cached here to speed up file operations.
     /// Specifically, `S_IFMT` masked bits.
-    pub mode: Mode,
+    pub format: Format,
     cursor: Mutex<usize>,
     dentry: Arc<Dentry>,
 }
@@ -34,12 +34,7 @@ impl InodeFile {
     pub fn new(dentry: Arc<Dentry>, flags: OpenFlags) -> File {
         // SAFETY: `dentry` used to create `InodeFile` is valid.
         // SAFETY: `mode` should never change with respect to the `S_IFMT` fields.
-        let cached_mode = dentry
-            .get_inode()
-            .expect("`dentry` is invalid")
-            .mode
-            .load()
-            .format();
+        let format = dentry.inode().expect("dentry should be invalid").format();
 
         let (r, w, a) = flags.as_rwa();
 
@@ -50,15 +45,15 @@ impl InodeFile {
                 r,
                 w,
                 a,
-                mode: cached_mode,
+                format,
                 cursor: Mutex::new(0),
             }),
         )
     }
 
     pub fn sendfile_check(&self) -> KResult<()> {
-        match self.mode {
-            Mode::REG | Mode::BLK => Ok(()),
+        match self.format {
+            Format::REG | Format::BLK => Ok(()),
             _ => Err(EBADF),
         }
     }
@@ -70,21 +65,19 @@ impl InodeFile {
 
         let mut cursor = self.cursor.lock().await;
 
-        if self.a {
-            let nwrote = self.dentry.write(stream, WriteOffset::End(&mut cursor))?;
+        let (offset, update_offset) = match (self.a, offset) {
+            (true, _) => (WriteOffset::End(&mut cursor), None),
+            (false, Some(offset)) => (WriteOffset::Position(offset), None),
+            (false, None) => (WriteOffset::Position(*cursor), Some(&mut *cursor)),
+        };
 
-            Ok(nwrote)
-        } else {
-            let nwrote = if let Some(offset) = offset {
-                self.dentry.write(stream, WriteOffset::Position(offset))?
-            } else {
-                let nwrote = self.dentry.write(stream, WriteOffset::Position(*cursor))?;
-                *cursor += nwrote;
-                nwrote
-            };
-
-            Ok(nwrote)
+        let nr_write = self.dentry.write(stream, offset).await?;
+
+        if let Some(update_offset) = update_offset {
+            *update_offset += nr_write;
         }
+
+        Ok(nr_write)
     }
 
     pub async fn read(&self, buffer: &mut dyn Buffer, offset: Option<usize>) -> KResult<usize> {
@@ -92,24 +85,20 @@ impl InodeFile {
             return Err(EBADF);
         }
 
-        let nread = if let Some(offset) = offset {
-            let nread = self.dentry.read(buffer, offset)?;
-            nread
-        } else {
-            let mut cursor = self.cursor.lock().await;
-
-            let nread = self.dentry.read(buffer, *cursor)?;
+        if let Some(offset) = offset {
+            return Ok(self.dentry.read(buffer, offset).await?);
+        }
 
-            *cursor += nread;
-            nread
-        };
+        let mut cursor = self.cursor.lock().await;
+        let nread = self.dentry.read(buffer, *cursor).await?;
 
+        *cursor += nread;
         Ok(nread)
     }
 }
 
 impl File {
-    pub fn get_inode(&self) -> KResult<Option<Arc<dyn Inode>>> {
+    pub fn get_inode(&self) -> KResult<Option<InodeUse<dyn Inode>>> {
         if let FileType::Inode(inode_file) = &**self {
             Ok(Some(inode_file.dentry.get_inode()?))
         } else {
@@ -124,27 +113,30 @@ impl File {
 
         let mut cursor = inode_file.cursor.lock().await;
 
-        let nread = inode_file.dentry.readdir(*cursor, |filename, ino| {
-            // + 1 for filename length padding '\0', + 1 for d_type.
-            let real_record_len = core::mem::size_of::<UserDirent>() + filename.len() + 2;
+        let nread = inode_file
+            .dentry
+            .readdir(*cursor, |filename, ino| {
+                // + 1 for filename length padding '\0', + 1 for d_type.
+                let real_record_len = core::mem::size_of::<UserDirent>() + filename.len() + 2;
 
-            if buffer.available() < real_record_len {
-                return Ok(ControlFlow::Break(()));
-            }
+                if buffer.available() < real_record_len {
+                    return Ok(false);
+                }
 
-            let record = UserDirent {
-                d_ino: ino as u32,
-                d_off: 0,
-                d_reclen: real_record_len as u16,
-                d_name: [0; 0],
-            };
+                let record = UserDirent {
+                    d_ino: ino.as_raw() as u32,
+                    d_off: 0,
+                    d_reclen: real_record_len as u16,
+                    d_name: [0; 0],
+                };
 
-            buffer.copy(&record)?.ok_or(EFAULT)?;
-            buffer.fill(filename)?.ok_or(EFAULT)?;
-            buffer.fill(&[0, 0])?.ok_or(EFAULT)?;
+                buffer.copy(&record)?.ok_or(EFAULT)?;
+                buffer.fill(filename)?.ok_or(EFAULT)?;
+                buffer.fill(&[0, 0])?.ok_or(EFAULT)?;
 
-            Ok(ControlFlow::Continue(()))
-        })?;
+                Ok(true)
+            })
+            .await??;
 
         *cursor += nread;
         Ok(())
@@ -157,28 +149,31 @@ impl File {
 
         let mut cursor = inode_file.cursor.lock().await;
 
-        let nread = inode_file.dentry.readdir(*cursor, |filename, ino| {
-            // Filename length + 1 for padding '\0'
-            let real_record_len = core::mem::size_of::<UserDirent64>() + filename.len() + 1;
+        let nread = inode_file
+            .dentry
+            .readdir(*cursor, |filename, ino| {
+                // Filename length + 1 for padding '\0'
+                let real_record_len = core::mem::size_of::<UserDirent64>() + filename.len() + 1;
 
-            if buffer.available() < real_record_len {
-                return Ok(ControlFlow::Break(()));
-            }
+                if buffer.available() < real_record_len {
+                    return Ok(false);
+                }
 
-            let record = UserDirent64 {
-                d_ino: ino,
-                d_off: 0,
-                d_reclen: real_record_len as u16,
-                d_type: 0,
-                d_name: [0; 0],
-            };
+                let record = UserDirent64 {
+                    d_ino: ino.as_raw(),
+                    d_off: 0,
+                    d_reclen: real_record_len as u16,
+                    d_type: 0,
+                    d_name: [0; 0],
+                };
 
-            buffer.copy(&record)?.ok_or(EFAULT)?;
-            buffer.fill(filename)?.ok_or(EFAULT)?;
-            buffer.fill(&[0])?.ok_or(EFAULT)?;
+                buffer.copy(&record)?.ok_or(EFAULT)?;
+                buffer.fill(filename)?.ok_or(EFAULT)?;
+                buffer.fill(&[0])?.ok_or(EFAULT)?;
 
-            Ok(ControlFlow::Continue(()))
-        })?;
+                Ok(true)
+            })
+            .await??;
 
         *cursor += nread;
         Ok(())
@@ -196,7 +191,7 @@ impl File {
             SeekOption::Set(n) => n,
             SeekOption::End(off) => {
                 let inode = inode_file.dentry.get_inode()?;
-                let size = inode.size.load(Ordering::Relaxed) as usize;
+                let size = inode.info().lock().size as usize;
                 size.checked_add_signed(off).ok_or(EOVERFLOW)?
             }
         };

+ 14 - 9
src/kernel/vfs/filearray.rs

@@ -1,6 +1,6 @@
 use super::{
     file::{File, InodeFile, Pipe},
-    inode::Mode,
+    types::{Format, Permission},
     Spin, TerminalFile,
 };
 use crate::kernel::{
@@ -280,26 +280,31 @@ impl FileArray {
         Ok((read_fd, write_fd))
     }
 
-    pub fn open(&self, dentry: &Arc<Dentry>, flags: OpenFlags, mode: Mode) -> KResult<FD> {
-        dentry.open_check(flags, mode)?;
+    pub async fn open(
+        &self,
+        dentry: &Arc<Dentry>,
+        flags: OpenFlags,
+        perm: Permission,
+    ) -> KResult<FD> {
+        dentry.open_check(flags, perm).await?;
 
         let fdflag = flags.as_fd_flags();
 
         let inode = dentry.get_inode()?;
-        let file_format = inode.mode.load().format();
+        let file_format = inode.format();
 
         match (flags.directory(), file_format, flags.write()) {
-            (true, Mode::DIR, _) => {}
+            (true, Format::DIR, _) => {}
             (true, _, _) => return Err(ENOTDIR),
-            (false, Mode::DIR, true) => return Err(EISDIR),
+            (false, Format::DIR, true) => return Err(EISDIR),
             _ => {}
         }
 
-        if flags.truncate() && flags.write() && file_format.is_reg() {
-            inode.truncate(0)?;
+        if flags.truncate() && flags.write() && file_format == Format::REG {
+            inode.truncate(0).await?;
         }
 
-        let file = if file_format.is_chr() {
+        let file = if file_format == Format::CHR {
             let device = CharDevice::get(inode.devid()?).ok_or(ENXIO)?;
             device.open(flags)?
         } else {

+ 0 - 494
src/kernel/vfs/inode.rs

@@ -1,494 +0,0 @@
-use super::{dentry::Dentry, vfs::Vfs, DevId};
-use crate::io::Stream;
-use crate::kernel::constants::{
-    EINVAL, EISDIR, ENOTDIR, EPERM, STATX_ATIME, STATX_BLOCKS, STATX_CTIME, STATX_GID, STATX_INO,
-    STATX_MODE, STATX_MTIME, STATX_NLINK, STATX_SIZE, STATX_TYPE, STATX_UID, S_IFBLK, S_IFCHR,
-    S_IFDIR, S_IFLNK, S_IFMT, S_IFREG,
-};
-use crate::kernel::mem::PageCache;
-use crate::kernel::syscall::{FromSyscallArg, SyscallRetVal};
-use crate::kernel::task::block_on;
-use crate::kernel::timer::Instant;
-use crate::{io::Buffer, prelude::*};
-use alloc::sync::{Arc, Weak};
-use core::{
-    mem::MaybeUninit,
-    ops::ControlFlow,
-    ptr::addr_of_mut,
-    sync::atomic::{AtomicU32, AtomicU64, Ordering},
-};
-use eonix_sync::RwLock;
-use posix_types::stat::StatX;
-
-pub type Ino = u64;
-pub type AtomicIno = AtomicU64;
-#[allow(dead_code)]
-pub type ISize = u64;
-pub type AtomicISize = AtomicU64;
-#[allow(dead_code)]
-pub type Nlink = u64;
-pub type AtomicNlink = AtomicU64;
-#[allow(dead_code)]
-pub type Uid = u32;
-pub type AtomicUid = AtomicU32;
-#[allow(dead_code)]
-pub type Gid = u32;
-pub type AtomicGid = AtomicU32;
-
-#[derive(Clone, Copy, PartialEq, Eq)]
-pub struct Mode(u32);
-
-pub struct AtomicMode(AtomicU32);
-
-#[derive(Debug)]
-pub struct InodeData {
-    pub ino: Ino,
-    pub size: AtomicISize,
-    pub nlink: AtomicNlink,
-
-    pub uid: AtomicUid,
-    pub gid: AtomicGid,
-    pub mode: AtomicMode,
-
-    pub atime: Spin<Instant>,
-    pub ctime: Spin<Instant>,
-    pub mtime: Spin<Instant>,
-
-    pub rwsem: RwLock<()>,
-
-    pub vfs: Weak<dyn Vfs>,
-}
-
-impl InodeData {
-    pub fn new(ino: Ino, vfs: Weak<dyn Vfs>) -> Self {
-        Self {
-            ino,
-            vfs,
-            atime: Spin::new(Instant::now()),
-            ctime: Spin::new(Instant::now()),
-            mtime: Spin::new(Instant::now()),
-            rwsem: RwLock::new(()),
-            size: AtomicU64::new(0),
-            nlink: AtomicNlink::new(0),
-            uid: AtomicUid::new(0),
-            gid: AtomicGid::new(0),
-            mode: AtomicMode::new(0),
-        }
-    }
-}
-
-#[allow(dead_code)]
-pub trait InodeInner:
-    Send + Sync + core::ops::Deref<Target = InodeData> + core::ops::DerefMut
-{
-    fn data(&self) -> &InodeData;
-    fn data_mut(&mut self) -> &mut InodeData;
-}
-
-pub enum WriteOffset<'end> {
-    Position(usize),
-    End(&'end mut usize),
-}
-
-pub struct RenameData<'a, 'b> {
-    pub old_dentry: &'a Arc<Dentry>,
-    pub new_dentry: &'b Arc<Dentry>,
-    pub new_parent: Arc<dyn Inode>,
-    pub vfs: Arc<dyn Vfs>,
-    pub is_exchange: bool,
-    pub no_replace: bool,
-}
-
-#[allow(unused_variables)]
-pub trait Inode: Send + Sync + InodeInner + Any {
-    fn is_dir(&self) -> bool {
-        self.mode.load().is_dir()
-    }
-
-    fn lookup(&self, dentry: &Arc<Dentry>) -> KResult<Option<Arc<dyn Inode>>> {
-        Err(if !self.is_dir() { ENOTDIR } else { EPERM })
-    }
-
-    fn creat(&self, at: &Arc<Dentry>, mode: Mode) -> KResult<()> {
-        Err(if !self.is_dir() { ENOTDIR } else { EPERM })
-    }
-
-    fn mkdir(&self, at: &Dentry, mode: Mode) -> KResult<()> {
-        Err(if !self.is_dir() { ENOTDIR } else { EPERM })
-    }
-
-    fn mknod(&self, at: &Dentry, mode: Mode, dev: DevId) -> KResult<()> {
-        Err(if !self.is_dir() { ENOTDIR } else { EPERM })
-    }
-
-    fn unlink(&self, at: &Arc<Dentry>) -> KResult<()> {
-        Err(if !self.is_dir() { ENOTDIR } else { EPERM })
-    }
-
-    fn symlink(&self, at: &Arc<Dentry>, target: &[u8]) -> KResult<()> {
-        Err(if !self.is_dir() { ENOTDIR } else { EPERM })
-    }
-
-    fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize> {
-        Err(if self.is_dir() { EISDIR } else { EINVAL })
-    }
-
-    fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize> {
-        Err(if self.is_dir() { EISDIR } else { EINVAL })
-    }
-
-    fn write(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult<usize> {
-        Err(if self.is_dir() { EISDIR } else { EINVAL })
-    }
-
-    fn write_direct(&self, stream: &mut dyn Stream, offset: usize) -> KResult<usize> {
-        Err(if self.is_dir() { EISDIR } else { EINVAL })
-    }
-
-    fn devid(&self) -> KResult<DevId> {
-        Err(if self.is_dir() { EISDIR } else { EINVAL })
-    }
-
-    fn readlink(&self, buffer: &mut dyn Buffer) -> KResult<usize> {
-        Err(if self.is_dir() { EISDIR } else { EINVAL })
-    }
-
-    fn truncate(&self, length: usize) -> KResult<()> {
-        Err(if self.is_dir() { EISDIR } else { EPERM })
-    }
-
-    fn rename(&self, rename_data: RenameData) -> KResult<()> {
-        Err(if !self.is_dir() { ENOTDIR } else { EPERM })
-    }
-
-    fn do_readdir(
-        &self,
-        offset: usize,
-        callback: &mut dyn FnMut(&[u8], Ino) -> KResult<ControlFlow<(), ()>>,
-    ) -> KResult<usize> {
-        Err(if !self.is_dir() { ENOTDIR } else { EPERM })
-    }
-
-    fn chmod(&self, mode: Mode) -> KResult<()> {
-        Err(EPERM)
-    }
-
-    fn chown(&self, uid: u32, gid: u32) -> KResult<()> {
-        Err(EPERM)
-    }
-
-    fn page_cache(&self) -> Option<&PageCache> {
-        None
-    }
-
-    fn statx(&self, stat: &mut StatX, mask: u32) -> KResult<()> {
-        // Safety: ffi should have checked reference
-        let vfs = self.vfs.upgrade().expect("Vfs is dropped");
-
-        let size = self.size.load(Ordering::Relaxed);
-        let mode = self.mode.load();
-
-        if mask & STATX_NLINK != 0 {
-            stat.stx_nlink = self.nlink.load(Ordering::Acquire) as _;
-            stat.stx_mask |= STATX_NLINK;
-        }
-
-        if mask & STATX_ATIME != 0 {
-            let atime = *self.atime.lock();
-            stat.stx_atime = atime.into();
-            stat.stx_mask |= STATX_ATIME;
-        }
-
-        if mask & STATX_MTIME != 0 {
-            let mtime = *self.mtime.lock();
-            stat.stx_mtime = mtime.into();
-            stat.stx_mask |= STATX_MTIME;
-        }
-
-        if mask & STATX_CTIME != 0 {
-            let ctime = *self.ctime.lock();
-            stat.stx_ctime = ctime.into();
-            stat.stx_mask |= STATX_CTIME;
-        }
-
-        if mask & STATX_SIZE != 0 {
-            stat.stx_size = self.size.load(Ordering::Relaxed) as _;
-            stat.stx_mask |= STATX_SIZE;
-        }
-
-        stat.stx_mode = 0;
-        if mask & STATX_MODE != 0 {
-            stat.stx_mode |= mode.non_format_bits() as u16;
-            stat.stx_mask |= STATX_MODE;
-        }
-
-        if mask & STATX_TYPE != 0 {
-            stat.stx_mode |= mode.format_bits() as u16;
-            if mode.is_blk() || mode.is_chr() {
-                let devid = self.devid();
-                stat.stx_rdev_major = (devid? >> 8) & 0xff;
-                stat.stx_rdev_minor = devid? & 0xff;
-            }
-            stat.stx_mask |= STATX_TYPE;
-        }
-
-        if mask & STATX_INO != 0 {
-            stat.stx_ino = self.ino as _;
-            stat.stx_mask |= STATX_INO;
-        }
-
-        if mask & STATX_BLOCKS != 0 {
-            stat.stx_blocks = (size + 512 - 1) / 512;
-            stat.stx_blksize = vfs.io_blksize() as _;
-            stat.stx_mask |= STATX_BLOCKS;
-        }
-
-        if mask & STATX_UID != 0 {
-            stat.stx_uid = self.uid.load(Ordering::Relaxed) as _;
-            stat.stx_mask |= STATX_UID;
-        }
-
-        if mask & STATX_GID != 0 {
-            stat.stx_gid = self.gid.load(Ordering::Relaxed) as _;
-            stat.stx_mask |= STATX_GID;
-        }
-
-        let fsdev = vfs.fs_devid();
-        stat.stx_dev_major = (fsdev >> 8) & 0xff;
-        stat.stx_dev_minor = fsdev & 0xff;
-
-        // TODO: support more attributes
-        stat.stx_attributes_mask = 0;
-
-        Ok(())
-    }
-
-    fn new_locked<F>(ino: Ino, vfs: Weak<dyn Vfs>, f: F) -> Arc<Self>
-    where
-        Self: Sized,
-        F: FnOnce(*mut Self, &()),
-    {
-        let mut uninit = Arc::<Self>::new_uninit();
-
-        let uninit_mut = Arc::get_mut(&mut uninit).unwrap();
-
-        // Safety: `idata` is owned by `uninit`
-        let idata = unsafe {
-            addr_of_mut!(*(*uninit_mut.as_mut_ptr()).data_mut())
-                .cast::<MaybeUninit<InodeData>>()
-                .as_mut()
-                .unwrap()
-        };
-
-        idata.write(InodeData::new(ino, vfs));
-
-        f(
-            uninit_mut.as_mut_ptr(),
-            // SAFETY: `idata` is initialized and we will never move the lock.
-            &block_on(unsafe { idata.assume_init_ref() }.rwsem.read()),
-        );
-
-        // Safety: `uninit` is initialized
-        unsafe { uninit.assume_init() }
-    }
-}
-
-// TODO: define multiple inode structs a time
-macro_rules! define_struct_inode {
-    ($v:vis struct $inode_t:ident;) => {
-        $v struct $inode_t {
-            /// Do not use this directly
-            idata: $crate::kernel::vfs::inode::InodeData,
-        }
-
-        impl core::ops::Deref for $inode_t {
-            type Target = $crate::kernel::vfs::inode::InodeData;
-
-            fn deref(&self) -> &Self::Target {
-                &self.idata
-            }
-        }
-
-        impl core::ops::DerefMut for $inode_t {
-            fn deref_mut(&mut self) -> &mut Self::Target {
-                &mut self.idata
-            }
-        }
-
-        impl $crate::kernel::vfs::inode::InodeInner for $inode_t {
-            fn data(&self) -> &$crate::kernel::vfs::inode::InodeData {
-                &self.idata
-            }
-
-            fn data_mut(&mut self) -> &mut $crate::kernel::vfs::inode::InodeData {
-                &mut self.idata
-            }
-        }
-    };
-    ($v:vis struct $inode_t:ident { $($vis:vis $name:ident: $type:ty,)* }) => {
-        $v struct $inode_t {
-            /// Do not use this directly
-            idata: $crate::kernel::vfs::inode::InodeData,
-            $($vis $name: $type,)*
-        }
-
-        impl core::ops::Deref for $inode_t {
-            type Target = $crate::kernel::vfs::inode::InodeData;
-
-            fn deref(&self) -> &Self::Target {
-                &self.idata
-            }
-        }
-
-        impl core::ops::DerefMut for $inode_t {
-            fn deref_mut(&mut self) -> &mut Self::Target {
-                &mut self.idata
-            }
-        }
-
-        impl $crate::kernel::vfs::inode::InodeInner for $inode_t {
-            fn data(&self) -> &$crate::kernel::vfs::inode::InodeData {
-                &self.idata
-            }
-
-            fn data_mut(&mut self) -> &mut $crate::kernel::vfs::inode::InodeData {
-                &mut self.idata
-            }
-        }
-    };
-}
-
-pub(crate) use define_struct_inode;
-
-impl Mode {
-    pub const REG: Self = Self(S_IFREG);
-    pub const DIR: Self = Self(S_IFDIR);
-    pub const LNK: Self = Self(S_IFLNK);
-    pub const BLK: Self = Self(S_IFBLK);
-    pub const CHR: Self = Self(S_IFCHR);
-
-    pub const fn new(bits: u32) -> Self {
-        Self(bits)
-    }
-
-    pub const fn is_blk(&self) -> bool {
-        (self.0 & S_IFMT) == S_IFBLK
-    }
-
-    pub const fn is_chr(&self) -> bool {
-        (self.0 & S_IFMT) == S_IFCHR
-    }
-
-    pub const fn is_reg(&self) -> bool {
-        (self.0 & S_IFMT) == S_IFREG
-    }
-
-    pub const fn is_dir(&self) -> bool {
-        (self.0 & S_IFMT) == S_IFDIR
-    }
-
-    pub const fn is_lnk(&self) -> bool {
-        (self.0 & S_IFMT) == S_IFLNK
-    }
-
-    pub const fn bits(&self) -> u32 {
-        self.0
-    }
-
-    pub const fn format_bits(&self) -> u32 {
-        self.0 & S_IFMT
-    }
-
-    pub const fn format(&self) -> Self {
-        Self::new(self.format_bits())
-    }
-
-    pub const fn non_format_bits(&self) -> u32 {
-        self.0 & !S_IFMT
-    }
-
-    pub const fn non_format(&self) -> Self {
-        Self::new(self.non_format_bits())
-    }
-
-    pub const fn perm(self, perm: u32) -> Self {
-        Self::new((self.0 & !0o777) | (perm & 0o777))
-    }
-
-    pub const fn set_perm(&mut self, perm: u32) {
-        *self = self.perm(perm);
-    }
-
-    pub const fn mask_perm(&mut self, perm_mask: u32) {
-        let perm_mask = perm_mask & 0o777;
-        let self_perm = self.non_format_bits() & 0o777;
-
-        *self = self.perm(self_perm & perm_mask);
-    }
-}
-
-impl AtomicMode {
-    pub const fn new(bits: u32) -> Self {
-        Self(AtomicU32::new(bits))
-    }
-
-    pub const fn from(mode: Mode) -> Self {
-        Self::new(mode.0)
-    }
-
-    pub fn load(&self) -> Mode {
-        Mode(self.0.load(Ordering::Relaxed))
-    }
-
-    pub fn store(&self, mode: Mode) {
-        self.0.store(mode.0, Ordering::Relaxed);
-    }
-}
-
-impl core::fmt::Debug for AtomicMode {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-        f.debug_struct("AtomicMode")
-            .field("bits", &self.load().0)
-            .finish()
-    }
-}
-
-impl core::fmt::Debug for Mode {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-        let format_name = match self.format() {
-            Mode::REG => "REG",
-            Mode::DIR => "DIR",
-            Mode::LNK => "LNK",
-            Mode::BLK => "BLK",
-            Mode::CHR => "CHR",
-            _ => "UNK",
-        };
-
-        match self.non_format_bits() & !0o777 {
-            0 => write!(
-                f,
-                "Mode({format_name}, {perm:#o})",
-                perm = self.non_format_bits()
-            )?,
-            rem => write!(
-                f,
-                "Mode({format_name}, {perm:#o}, rem={rem:#x})",
-                perm = self.non_format_bits() & 0o777
-            )?,
-        }
-
-        Ok(())
-    }
-}
-
-impl FromSyscallArg for Mode {
-    fn from_arg(value: usize) -> Self {
-        Mode::new(value as u32)
-    }
-}
-
-impl SyscallRetVal for Mode {
-    fn into_retval(self) -> Option<usize> {
-        Some(self.bits() as usize)
-    }
-}

+ 31 - 0
src/kernel/vfs/inode/ino.rs

@@ -0,0 +1,31 @@
+use core::{
+    fmt::{Debug, Display, Formatter},
+    sync::atomic::AtomicU64,
+};
+
+#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct Ino(u64);
+
+pub struct AtomicIno(AtomicU64);
+
+impl Ino {
+    pub const fn new(ino: u64) -> Self {
+        Self(ino)
+    }
+
+    pub const fn as_raw(self) -> u64 {
+        self.0
+    }
+}
+
+impl Debug for Ino {
+    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
+        write!(f, "Ino({})", self.0)
+    }
+}
+
+impl Display for Ino {
+    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
+        write!(f, "{:?}", self)
+    }
+}

+ 389 - 0
src/kernel/vfs/inode/inode.rs

@@ -0,0 +1,389 @@
+use alloc::boxed::Box;
+use core::{
+    any::Any,
+    future::Future,
+    marker::Unsize,
+    ops::{CoerceUnsized, Deref},
+    pin::Pin,
+};
+use eonix_sync::Spin;
+
+use alloc::sync::{Arc, Weak};
+use async_trait::async_trait;
+
+use crate::{
+    io::{Buffer, Stream},
+    kernel::{
+        constants::{EINVAL, EPERM},
+        mem::PageCache,
+        timer::Instant,
+        vfs::{
+            dentry::Dentry,
+            types::{DeviceId, Format, Mode, Permission},
+            SbRef, SbUse, SuperBlock,
+        },
+    },
+    prelude::KResult,
+};
+
+use super::{Ino, RenameData, WriteOffset};
+
+pub trait InodeOps: Sized + Send + Sync + 'static {
+    type SuperBlock: SuperBlock + Sized;
+
+    fn ino(&self) -> Ino;
+    fn format(&self) -> Format;
+    fn info(&self) -> &Spin<InodeInfo>;
+
+    fn super_block(&self) -> &SbRef<Self::SuperBlock>;
+
+    fn page_cache(&self) -> Option<&PageCache>;
+}
+
+#[allow(unused_variables)]
+pub trait InodeDirOps: InodeOps {
+    fn lookup(
+        &self,
+        dentry: &Arc<Dentry>,
+    ) -> impl Future<Output = KResult<Option<InodeUse<dyn Inode>>>> + Send {
+        async { Err(EPERM) }
+    }
+
+    /// Read directory entries and call the given closure for each entry.
+    ///
+    /// # Returns
+    /// - Ok(count): The number of entries read.
+    /// - Ok(Err(err)): Some error occurred while calling the given closure.
+    /// - Err(err): An error occurred while reading the directory.
+    fn readdir<'r, 'a: 'r, 'b: 'r>(
+        &'a self,
+        offset: usize,
+        for_each_entry: &'b mut (dyn FnMut(&[u8], Ino) -> KResult<bool> + Send),
+    ) -> impl Future<Output = KResult<KResult<usize>>> + Send + 'r {
+        async { Err(EPERM) }
+    }
+
+    fn create(
+        &self,
+        at: &Arc<Dentry>,
+        mode: Permission,
+    ) -> impl Future<Output = KResult<()>> + Send {
+        async { Err(EPERM) }
+    }
+
+    fn mkdir(&self, at: &Dentry, mode: Permission) -> impl Future<Output = KResult<()>> + Send {
+        async { Err(EPERM) }
+    }
+
+    fn mknod(
+        &self,
+        at: &Dentry,
+        mode: Mode,
+        dev: DeviceId,
+    ) -> impl Future<Output = KResult<()>> + Send {
+        async { Err(EPERM) }
+    }
+
+    fn unlink(&self, at: &Arc<Dentry>) -> impl Future<Output = KResult<()>> + Send {
+        async { Err(EPERM) }
+    }
+
+    fn symlink(&self, at: &Arc<Dentry>, target: &[u8]) -> impl Future<Output = KResult<()>> + Send {
+        async { Err(EPERM) }
+    }
+
+    fn rename(&self, rename_data: RenameData<'_, '_>) -> impl Future<Output = KResult<()>> + Send {
+        async { Err(EPERM) }
+    }
+}
+
+#[allow(unused_variables)]
+pub trait InodeFileOps: InodeOps {
+    fn read(
+        &self,
+        buffer: &mut dyn Buffer,
+        offset: usize,
+    ) -> impl Future<Output = KResult<usize>> + Send {
+        async { Err(EINVAL) }
+    }
+
+    fn read_direct(
+        &self,
+        buffer: &mut dyn Buffer,
+        offset: usize,
+    ) -> impl Future<Output = KResult<usize>> + Send {
+        async { Err(EINVAL) }
+    }
+
+    fn write(
+        &self,
+        stream: &mut dyn Stream,
+        offset: WriteOffset<'_>,
+    ) -> impl Future<Output = KResult<usize>> + Send {
+        async { Err(EINVAL) }
+    }
+
+    fn write_direct(
+        &self,
+        stream: &mut dyn Stream,
+        offset: usize,
+    ) -> impl Future<Output = KResult<usize>> + Send {
+        async { Err(EINVAL) }
+    }
+
+    fn devid(&self) -> KResult<DeviceId> {
+        Err(EINVAL)
+    }
+
+    fn readlink(&self, buffer: &mut dyn Buffer) -> impl Future<Output = KResult<usize>> + Send {
+        async { Err(EINVAL) }
+    }
+
+    fn truncate(&self, length: usize) -> impl Future<Output = KResult<()>> + Send {
+        async { Err(EPERM) }
+    }
+
+    fn chmod(&self, perm: Permission) -> impl Future<Output = KResult<()>> + Send {
+        async { Err(EPERM) }
+    }
+
+    fn chown(&self, uid: u32, gid: u32) -> impl Future<Output = KResult<()>> + Send {
+        async { Err(EPERM) }
+    }
+}
+
+#[async_trait]
+pub trait InodeDir {
+    async fn lookup(&self, dentry: &Arc<Dentry>) -> KResult<Option<InodeUse<dyn Inode>>>;
+    async fn create(&self, at: &Arc<Dentry>, perm: Permission) -> KResult<()>;
+    async fn mkdir(&self, at: &Dentry, perm: Permission) -> KResult<()>;
+    async fn mknod(&self, at: &Dentry, mode: Mode, dev: DeviceId) -> KResult<()>;
+    async fn unlink(&self, at: &Arc<Dentry>) -> KResult<()>;
+    async fn symlink(&self, at: &Arc<Dentry>, target: &[u8]) -> KResult<()>;
+    async fn rename(&self, rename_data: RenameData<'_, '_>) -> KResult<()>;
+
+    fn readdir<'r, 'a: 'r, 'b: 'r>(
+        &'a self,
+        offset: usize,
+        callback: &'b mut (dyn FnMut(&[u8], Ino) -> KResult<bool> + Send),
+    ) -> Pin<Box<dyn Future<Output = KResult<KResult<usize>>> + Send + 'r>>;
+}
+
+#[async_trait]
+pub trait InodeFile {
+    async fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize>;
+    async fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize>;
+    async fn write(&self, stream: &mut dyn Stream, offset: WriteOffset<'_>) -> KResult<usize>;
+    async fn write_direct(&self, stream: &mut dyn Stream, offset: usize) -> KResult<usize>;
+    fn devid(&self) -> KResult<DeviceId>;
+    async fn readlink(&self, buffer: &mut dyn Buffer) -> KResult<usize>;
+    async fn truncate(&self, length: usize) -> KResult<()>;
+    async fn chmod(&self, mode: Mode) -> KResult<()>;
+    async fn chown(&self, uid: u32, gid: u32) -> KResult<()>;
+}
+
+pub trait Inode: InodeFile + InodeDir + Any + Send + Sync + 'static {
+    fn ino(&self) -> Ino;
+    fn format(&self) -> Format;
+    fn info(&self) -> &Spin<InodeInfo>;
+
+    // TODO: This might should be removed... Temporary workaround for now.
+    fn page_cache(&self) -> Option<&PageCache>;
+
+    fn sbref(&self) -> SbRef<dyn SuperBlock>;
+    fn sbget(&self) -> KResult<SbUse<dyn SuperBlock>>;
+}
+
+#[async_trait]
+impl<T> InodeFile for T
+where
+    T: InodeFileOps,
+{
+    async fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize> {
+        self.read(buffer, offset).await
+    }
+
+    async fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize> {
+        self.read_direct(buffer, offset).await
+    }
+
+    async fn write(&self, stream: &mut dyn Stream, offset: WriteOffset<'_>) -> KResult<usize> {
+        self.write(stream, offset).await
+    }
+
+    async fn write_direct(&self, stream: &mut dyn Stream, offset: usize) -> KResult<usize> {
+        self.write_direct(stream, offset).await
+    }
+
+    fn devid(&self) -> KResult<DeviceId> {
+        self.devid()
+    }
+
+    async fn readlink(&self, buffer: &mut dyn Buffer) -> KResult<usize> {
+        self.readlink(buffer).await
+    }
+
+    async fn truncate(&self, length: usize) -> KResult<()> {
+        self.truncate(length).await
+    }
+
+    async fn chmod(&self, mode: Mode) -> KResult<()> {
+        self.chmod(Permission::new(mode.non_format_bits())).await
+    }
+
+    async fn chown(&self, uid: u32, gid: u32) -> KResult<()> {
+        self.chown(uid, gid).await
+    }
+}
+
+#[async_trait]
+impl<T> InodeDir for T
+where
+    T: InodeDirOps,
+{
+    async fn lookup(&self, dentry: &Arc<Dentry>) -> KResult<Option<InodeUse<dyn Inode>>> {
+        self.lookup(dentry).await
+    }
+
+    async fn create(&self, at: &Arc<Dentry>, perm: Permission) -> KResult<()> {
+        self.create(at, perm).await
+    }
+
+    async fn mkdir(&self, at: &Dentry, perm: Permission) -> KResult<()> {
+        self.mkdir(at, perm).await
+    }
+
+    async fn mknod(&self, at: &Dentry, mode: Mode, dev: DeviceId) -> KResult<()> {
+        self.mknod(at, mode, dev).await
+    }
+
+    async fn unlink(&self, at: &Arc<Dentry>) -> KResult<()> {
+        self.unlink(at).await
+    }
+
+    async fn symlink(&self, at: &Arc<Dentry>, target: &[u8]) -> KResult<()> {
+        self.symlink(at, target).await
+    }
+
+    async fn rename(&self, rename_data: RenameData<'_, '_>) -> KResult<()> {
+        self.rename(rename_data).await
+    }
+
+    fn readdir<'r, 'a: 'r, 'b: 'r>(
+        &'a self,
+        offset: usize,
+        callback: &'b mut (dyn FnMut(&[u8], Ino) -> KResult<bool> + Send),
+    ) -> Pin<Box<dyn Future<Output = KResult<KResult<usize>>> + Send + 'r>> {
+        Box::pin(self.readdir(offset, callback))
+    }
+}
+
+impl<T> Inode for T
+where
+    T: InodeOps + InodeFile + InodeDir,
+{
+    fn ino(&self) -> Ino {
+        self.ino()
+    }
+
+    fn format(&self) -> Format {
+        self.format()
+    }
+
+    fn info(&self) -> &Spin<InodeInfo> {
+        self.info()
+    }
+
+    fn page_cache(&self) -> Option<&PageCache> {
+        self.page_cache()
+    }
+
+    fn sbref(&self) -> SbRef<dyn SuperBlock> {
+        self.super_block().clone()
+    }
+
+    fn sbget(&self) -> KResult<SbUse<dyn SuperBlock>> {
+        self.super_block().get().map(|sb| sb as _)
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct InodeInfo {
+    pub size: u64,
+    pub nlink: u64,
+
+    pub uid: u32,
+    pub gid: u32,
+    pub perm: Permission,
+
+    pub atime: Instant,
+    pub ctime: Instant,
+    pub mtime: Instant,
+}
+
+#[derive(Clone)]
+pub struct InodeRef<I>(Weak<I>)
+where
+    I: Inode + ?Sized;
+
+pub struct InodeUse<I>(Arc<I>)
+where
+    I: Inode + ?Sized;
+
+impl<I> InodeUse<I>
+where
+    I: Inode,
+{
+    pub fn new(inode: I) -> Self {
+        Self(Arc::new(inode))
+    }
+
+    pub fn new_cyclic(inode_func: impl FnOnce(&Weak<I>) -> I) -> Self {
+        Self(Arc::new_cyclic(inode_func))
+    }
+}
+
+impl<I> InodeUse<I>
+where
+    I: Inode + ?Sized,
+{
+    pub fn as_raw(&self) -> *const I {
+        Arc::as_ptr(&self.0)
+    }
+}
+
+impl<T, U> CoerceUnsized<InodeUse<U>> for InodeUse<T>
+where
+    T: Inode + Unsize<U> + ?Sized,
+    U: Inode + ?Sized,
+{
+}
+
+impl<I> Clone for InodeUse<I>
+where
+    I: Inode + ?Sized,
+{
+    fn clone(&self) -> Self {
+        Self(self.0.clone())
+    }
+}
+
+impl<I> core::fmt::Debug for InodeUse<I>
+where
+    I: Inode + ?Sized,
+{
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        write!(f, "InodeUse(ino={})", self.ino())
+    }
+}
+
+impl<I> Deref for InodeUse<I>
+where
+    I: Inode + ?Sized,
+{
+    type Target = I;
+
+    fn deref(&self) -> &Self::Target {
+        self.0.deref()
+    }
+}

+ 10 - 0
src/kernel/vfs/inode/mod.rs

@@ -0,0 +1,10 @@
+mod ino;
+mod inode;
+mod ops;
+mod statx;
+
+pub use ino::Ino;
+pub use inode::{
+    Inode, InodeDir, InodeDirOps, InodeFile, InodeFileOps, InodeInfo, InodeOps, InodeRef, InodeUse,
+};
+pub use ops::{RenameData, WriteOffset};

+ 18 - 0
src/kernel/vfs/inode/ops.rs

@@ -0,0 +1,18 @@
+use alloc::sync::Arc;
+
+use crate::kernel::vfs::dentry::Dentry;
+
+use super::{inode::InodeUse, Inode};
+
+pub enum WriteOffset<'end> {
+    Position(usize),
+    End(&'end mut usize),
+}
+
+pub struct RenameData<'a, 'b> {
+    pub old_dentry: &'a Arc<Dentry>,
+    pub new_dentry: &'b Arc<Dentry>,
+    pub new_parent: InodeUse<dyn Inode>,
+    pub is_exchange: bool,
+    pub no_replace: bool,
+}

+ 97 - 0
src/kernel/vfs/inode/statx.rs

@@ -0,0 +1,97 @@
+use posix_types::stat::StatX;
+
+use crate::{
+    kernel::{
+        constants::{
+            STATX_ATIME, STATX_BLOCKS, STATX_CTIME, STATX_GID, STATX_INO, STATX_MODE, STATX_MTIME,
+            STATX_NLINK, STATX_SIZE, STATX_TYPE, STATX_UID,
+        },
+        vfs::types::Format,
+    },
+    prelude::KResult,
+};
+
+use super::{inode::InodeUse, Inode};
+
+impl<I> InodeUse<I>
+where
+    I: Inode + ?Sized,
+{
+    pub fn statx(&self, stat: &mut StatX, mask: u32) -> KResult<()> {
+        let sb = self.sbget()?;
+        let info = self.info().lock();
+
+        if mask & STATX_NLINK != 0 {
+            stat.stx_nlink = info.nlink as _;
+            stat.stx_mask |= STATX_NLINK;
+        }
+
+        if mask & STATX_ATIME != 0 {
+            stat.stx_atime = info.atime.into();
+            stat.stx_mask |= STATX_ATIME;
+        }
+
+        if mask & STATX_MTIME != 0 {
+            stat.stx_mtime = info.mtime.into();
+            stat.stx_mask |= STATX_MTIME;
+        }
+
+        if mask & STATX_CTIME != 0 {
+            stat.stx_ctime = info.ctime.into();
+            stat.stx_mask |= STATX_CTIME;
+        }
+
+        if mask & STATX_SIZE != 0 {
+            stat.stx_size = info.size as _;
+            stat.stx_mask |= STATX_SIZE;
+        }
+
+        stat.stx_mode = 0;
+        if mask & STATX_MODE != 0 {
+            stat.stx_mode |= info.perm.bits() as u16;
+            stat.stx_mask |= STATX_MODE;
+        }
+
+        if mask & STATX_TYPE != 0 {
+            let format = self.format();
+
+            stat.stx_mode |= format.as_raw() as u16;
+            if let Format::BLK | Format::CHR = format {
+                let devid = self.devid()?;
+                stat.stx_rdev_major = devid.major as _;
+                stat.stx_rdev_minor = devid.minor as _;
+            }
+            stat.stx_mask |= STATX_TYPE;
+        }
+
+        if mask & STATX_INO != 0 {
+            stat.stx_ino = self.ino().as_raw();
+            stat.stx_mask |= STATX_INO;
+        }
+
+        if mask & STATX_BLOCKS != 0 {
+            stat.stx_blocks = (info.size + 512 - 1) / 512;
+            stat.stx_blksize = sb.info.io_blksize as _;
+            stat.stx_mask |= STATX_BLOCKS;
+        }
+
+        if mask & STATX_UID != 0 {
+            stat.stx_uid = info.uid;
+            stat.stx_mask |= STATX_UID;
+        }
+
+        if mask & STATX_GID != 0 {
+            stat.stx_gid = info.gid;
+            stat.stx_mask |= STATX_GID;
+        }
+
+        let fsdev = sb.info.device_id;
+        stat.stx_dev_major = fsdev.major as _;
+        stat.stx_dev_minor = fsdev.minor as _;
+
+        // TODO: support more attributes
+        stat.stx_attributes_mask = 0;
+
+        Ok(())
+    }
+}

+ 11 - 11
src/kernel/vfs/mod.rs

@@ -1,31 +1,31 @@
-use crate::prelude::*;
-use alloc::sync::Arc;
-use dentry::Dentry;
-use eonix_sync::LazyLock;
-use inode::Mode;
-
 pub mod dentry;
 mod file;
 pub mod filearray;
 pub mod inode;
 pub mod mount;
-pub mod vfs;
+mod superblock;
+pub mod types;
 
-pub use file::{File, FileType, PollEvent, SeekOption, TerminalFile};
+use crate::prelude::*;
+use alloc::sync::Arc;
+use dentry::Dentry;
+use eonix_sync::LazyLock;
+use types::Permission;
 
-pub type DevId = u32;
+pub use file::{File, FileType, PollEvent, SeekOption, TerminalFile};
+pub use superblock::{SbRef, SbUse, SuperBlock, SuperBlockInfo, SuperBlockLock};
 
 pub struct FsContext {
     pub fsroot: Arc<Dentry>,
     pub cwd: Spin<Arc<Dentry>>,
-    pub umask: Spin<Mode>,
+    pub umask: Spin<Permission>,
 }
 
 static GLOBAL_FS_CONTEXT: LazyLock<Arc<FsContext>> = LazyLock::new(|| {
     Arc::new(FsContext {
         fsroot: Dentry::root().clone(),
         cwd: Spin::new(Dentry::root().clone()),
-        umask: Spin::new(Mode::new(0o022)),
+        umask: Spin::new(Permission::new(0o755)),
     })
 });
 

+ 20 - 12
src/kernel/vfs/mount.rs

@@ -1,11 +1,15 @@
 use super::{
     dentry::{dcache, Dentry, DROOT},
-    inode::Inode,
-    vfs::Vfs,
+    inode::{Inode, InodeUse},
+    SbUse, SuperBlock,
+};
+use crate::kernel::{
+    constants::{EEXIST, ENODEV, ENOTDIR},
+    task::block_on,
 };
-use crate::kernel::constants::{EEXIST, ENODEV, ENOTDIR};
 use crate::prelude::*;
 use alloc::{collections::btree_map::BTreeMap, string::ToString as _, sync::Arc};
+use async_trait::async_trait;
 use eonix_sync::LazyLock;
 
 pub const MS_RDONLY: u64 = 1 << 0;
@@ -30,17 +34,21 @@ static MOUNT_CREATORS: Spin<BTreeMap<String, Arc<dyn MountCreator>>> = Spin::new
 static MOUNTS: Spin<Vec<(Arc<Dentry>, MountPointData)>> = Spin::new(vec![]);
 
 pub struct Mount {
-    _vfs: Arc<dyn Vfs>,
+    sb: SbUse<dyn SuperBlock>,
     root: Arc<Dentry>,
 }
 
 impl Mount {
-    pub fn new(mp: &Dentry, vfs: Arc<dyn Vfs>, root_inode: Arc<dyn Inode>) -> KResult<Self> {
+    pub fn new(
+        mp: &Dentry,
+        sb: SbUse<dyn SuperBlock>,
+        root_inode: InodeUse<dyn Inode>,
+    ) -> KResult<Self> {
         let root_dentry = Dentry::create(mp.parent().clone(), &mp.get_name());
-        root_dentry.save_dir(root_inode)?;
+        root_dentry.fill(root_inode);
 
         Ok(Self {
-            _vfs: vfs,
+            sb,
             root: root_dentry,
         })
     }
@@ -53,9 +61,10 @@ impl Mount {
 unsafe impl Send for Mount {}
 unsafe impl Sync for Mount {}
 
+#[async_trait]
 pub trait MountCreator: Send + Sync {
     fn check_signature(&self, first_block: &[u8]) -> KResult<bool>;
-    fn create_mount(&self, source: &str, flags: u64, mp: &Arc<Dentry>) -> KResult<Mount>;
+    async fn create_mount(&self, source: &str, flags: u64, mp: &Arc<Dentry>) -> KResult<Mount>;
 }
 
 pub fn register_filesystem(fstype: &str, creator: Arc<dyn MountCreator>) -> KResult<()> {
@@ -77,7 +86,7 @@ struct MountPointData {
     flags: u64,
 }
 
-pub fn do_mount(
+pub async fn do_mount(
     mountpoint: &Arc<Dentry>,
     source: &str,
     mountpoint_str: &str,
@@ -101,7 +110,7 @@ pub fn do_mount(
         let creators = { MOUNT_CREATORS.lock() };
         creators.get(fstype).ok_or(ENODEV)?.clone()
     };
-    let mount = creator.create_mount(source, flags, mountpoint)?;
+    let mount = creator.create_mount(source, flags, mountpoint).await?;
 
     let root_dentry = mount.root().clone();
 
@@ -165,8 +174,7 @@ impl Dentry {
                 .cloned()
                 .expect("tmpfs not registered.");
 
-            let mount = creator
-                .create_mount(&source, mount_flags, &DROOT)
+            let mount = block_on(creator.create_mount(&source, mount_flags, &DROOT))
                 .expect("Failed to create root mount.");
 
             let root_dentry = mount.root().clone();

+ 127 - 0
src/kernel/vfs/superblock.rs

@@ -0,0 +1,127 @@
+use core::{
+    marker::Unsize,
+    ops::{CoerceUnsized, Deref},
+};
+
+use alloc::sync::{Arc, Weak};
+use eonix_sync::RwLock;
+
+use crate::{kernel::constants::EIO, prelude::KResult};
+
+use super::types::DeviceId;
+
+pub trait SuperBlock: Send + Sync + 'static {}
+
+#[derive(Debug, Clone)]
+pub struct SuperBlockInfo {
+    pub io_blksize: u32,
+    pub device_id: DeviceId,
+    pub read_only: bool,
+}
+
+pub struct SuperBlockLock(());
+
+pub struct SuperBlockComplex<Backend>
+where
+    Backend: SuperBlock + ?Sized,
+{
+    pub info: SuperBlockInfo,
+    pub rwsem: RwLock<SuperBlockLock>,
+    pub backend: Backend,
+}
+
+pub struct SbRef<S>(Weak<SuperBlockComplex<S>>)
+where
+    S: SuperBlock + ?Sized;
+
+pub struct SbUse<S>(Arc<SuperBlockComplex<S>>)
+where
+    S: SuperBlock + ?Sized;
+
+impl<S> SbRef<S>
+where
+    S: SuperBlock + ?Sized,
+{
+    pub fn try_get(&self) -> Option<SbUse<S>> {
+        self.0.upgrade().map(|arc| SbUse(arc))
+    }
+
+    pub fn get(&self) -> KResult<SbUse<S>> {
+        self.try_get().ok_or(EIO)
+    }
+
+    pub fn from(sb: &SbUse<S>) -> Self {
+        SbRef(Arc::downgrade(&sb.0))
+    }
+
+    pub fn eq<U>(&self, other: &SbRef<U>) -> bool
+    where
+        U: SuperBlock + ?Sized,
+    {
+        core::ptr::addr_eq(self.0.as_ptr(), other.0.as_ptr())
+    }
+}
+
+impl<S> SbUse<S>
+where
+    S: SuperBlock,
+{
+    pub fn new(info: SuperBlockInfo, backend: S) -> Self {
+        Self(Arc::new(SuperBlockComplex {
+            info,
+            rwsem: RwLock::new(SuperBlockLock(())),
+            backend,
+        }))
+    }
+
+    pub fn new_cyclic(info: SuperBlockInfo, backend_func: impl FnOnce(SbRef<S>) -> S) -> Self {
+        Self(Arc::new_cyclic(|weak| SuperBlockComplex {
+            info,
+            rwsem: RwLock::new(SuperBlockLock(())),
+            backend: backend_func(SbRef(weak.clone())),
+        }))
+    }
+}
+
+impl<S> Clone for SbRef<S>
+where
+    S: SuperBlock + ?Sized,
+{
+    fn clone(&self) -> Self {
+        SbRef(self.0.clone())
+    }
+}
+
+impl<S> Clone for SbUse<S>
+where
+    S: SuperBlock + ?Sized,
+{
+    fn clone(&self) -> Self {
+        SbUse(self.0.clone())
+    }
+}
+
+impl<T, U> CoerceUnsized<SbRef<U>> for SbRef<T>
+where
+    T: SuperBlock + Unsize<U> + ?Sized,
+    U: SuperBlock + ?Sized,
+{
+}
+
+impl<T, U> CoerceUnsized<SbUse<U>> for SbUse<T>
+where
+    T: SuperBlock + Unsize<U> + ?Sized,
+    U: SuperBlock + ?Sized,
+{
+}
+
+impl<S> Deref for SbUse<S>
+where
+    S: SuperBlock + ?Sized,
+{
+    type Target = SuperBlockComplex<S>;
+
+    fn deref(&self) -> &Self::Target {
+        self.0.deref()
+    }
+}

+ 36 - 0
src/kernel/vfs/types/device_id.rs

@@ -0,0 +1,36 @@
+use core::fmt::{Debug, Display, Formatter};
+
+#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct DeviceId {
+    pub major: u16,
+    pub minor: u16,
+}
+
+impl DeviceId {
+    pub const fn new(major: u16, minor: u16) -> Self {
+        Self { major, minor }
+    }
+
+    pub const fn from_raw(raw: u32) -> Self {
+        Self {
+            major: (raw >> 16) as u16,
+            minor: (raw & 0xFFFF) as u16,
+        }
+    }
+
+    pub const fn to_raw(self) -> u32 {
+        ((self.major as u32) << 16) | (self.minor as u32)
+    }
+}
+
+impl Debug for DeviceId {
+    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
+        write!(f, "DeviceId({:04x}:{:04x})", self.major, self.minor)
+    }
+}
+
+impl Display for DeviceId {
+    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
+        write!(f, "{:04x}:{:04x}", self.major, self.minor)
+    }
+}

+ 5 - 0
src/kernel/vfs/types/mod.rs

@@ -0,0 +1,5 @@
+mod device_id;
+mod mode;
+
+pub use device_id::DeviceId;
+pub use mode::{Format, Mode, Permission};

+ 169 - 0
src/kernel/vfs/types/mode.rs

@@ -0,0 +1,169 @@
+use crate::kernel::{
+    constants::{S_IFBLK, S_IFCHR, S_IFDIR, S_IFLNK, S_IFMT, S_IFREG},
+    syscall::{FromSyscallArg, SyscallRetVal},
+};
+
+#[derive(Clone, Copy, PartialEq, Eq)]
+pub struct Mode(u32);
+
+#[derive(Clone, Copy, PartialEq, Eq)]
+pub enum Format {
+    REG,
+    DIR,
+    LNK,
+    BLK,
+    CHR,
+}
+
+#[derive(Clone, Copy, PartialEq, Eq)]
+pub struct Permission(u32);
+
+impl Mode {
+    pub const fn new(bits: u32) -> Self {
+        Self(bits)
+    }
+
+    pub const fn is_blk(&self) -> bool {
+        (self.0 & S_IFMT) == S_IFBLK
+    }
+
+    pub const fn is_chr(&self) -> bool {
+        (self.0 & S_IFMT) == S_IFCHR
+    }
+
+    pub const fn bits(&self) -> u32 {
+        self.0
+    }
+
+    pub const fn format_bits(&self) -> u32 {
+        self.0 & S_IFMT
+    }
+
+    pub const fn non_format_bits(&self) -> u32 {
+        self.0 & !S_IFMT
+    }
+
+    pub fn format(&self) -> Format {
+        match self.format_bits() {
+            S_IFREG => Format::REG,
+            S_IFDIR => Format::DIR,
+            S_IFLNK => Format::LNK,
+            S_IFBLK => Format::BLK,
+            S_IFCHR => Format::CHR,
+            _ => panic!("unknown format bits: {:#o}", self.format_bits()),
+        }
+    }
+
+    pub fn perm(&self) -> Permission {
+        Permission::new(self.non_format_bits())
+    }
+
+    pub const fn non_format(&self) -> Self {
+        Self::new(self.non_format_bits())
+    }
+
+    pub const fn set_perm(&mut self, perm: Permission) {
+        self.0 = self.format_bits() | perm.bits();
+    }
+}
+
+impl Format {
+    pub const fn as_raw(&self) -> u32 {
+        match self {
+            Self::REG => S_IFREG,
+            Self::DIR => S_IFDIR,
+            Self::LNK => S_IFLNK,
+            Self::BLK => S_IFBLK,
+            Self::CHR => S_IFCHR,
+        }
+    }
+}
+
+impl Permission {
+    const RWX: [&str; 8] = ["---", "--x", "-w-", "-wx", "r--", "r-x", "rw-", "rwx"];
+
+    pub const fn new(perm_bits: u32) -> Self {
+        Self(perm_bits & 0o7777)
+    }
+
+    pub const fn bits(&self) -> u32 {
+        self.0
+    }
+
+    pub const fn mask_with(&self, mask: Self) -> Self {
+        Self(self.0 & mask.0)
+    }
+}
+
+impl core::fmt::Debug for Mode {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        match self.non_format_bits() & !0o777 {
+            0 => write!(
+                f,
+                "Mode({format:?}, {perm:#o})",
+                format = self.format(),
+                perm = self.non_format_bits()
+            )?,
+            rem => write!(
+                f,
+                "Mode({format:?}, {perm:#o}, rem={rem:#x})",
+                format = self.format(),
+                perm = self.non_format_bits() & 0o777
+            )?,
+        }
+
+        Ok(())
+    }
+}
+
+impl core::fmt::Debug for Format {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        match self {
+            Self::REG => write!(f, "REG"),
+            Self::DIR => write!(f, "DIR"),
+            Self::LNK => write!(f, "LNK"),
+            Self::BLK => write!(f, "BLK"),
+            Self::CHR => write!(f, "CHR"),
+        }
+    }
+}
+
+impl core::fmt::Debug for Permission {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        let owner = self.0 >> 6 & 0o7;
+        let group = self.0 >> 3 & 0o7;
+        let other = self.0 & 0o7;
+
+        write!(
+            f,
+            "{}{}{}",
+            Self::RWX[owner as usize],
+            Self::RWX[group as usize],
+            Self::RWX[other as usize]
+        )
+    }
+}
+
+impl FromSyscallArg for Mode {
+    fn from_arg(value: usize) -> Self {
+        Mode::new(value as u32)
+    }
+}
+
+impl SyscallRetVal for Mode {
+    fn into_retval(self) -> Option<usize> {
+        Some(self.bits() as usize)
+    }
+}
+
+impl FromSyscallArg for Permission {
+    fn from_arg(value: usize) -> Self {
+        Permission::new(value as u32)
+    }
+}
+
+impl SyscallRetVal for Permission {
+    fn into_retval(self) -> Option<usize> {
+        Some(self.bits() as usize)
+    }
+}

+ 0 - 10
src/kernel/vfs/vfs.rs

@@ -1,10 +0,0 @@
-use crate::prelude::*;
-
-use super::DevId;
-
-#[allow(dead_code)]
-pub trait Vfs: Send + Sync + AsAny {
-    fn io_blksize(&self) -> usize;
-    fn fs_devid(&self) -> DevId;
-    fn is_read_only(&self) -> bool;
-}

+ 21 - 12
src/lib.rs

@@ -2,10 +2,12 @@
 #![no_main]
 #![feature(allocator_api)]
 #![feature(c_size_t)]
+#![feature(coerce_unsized)]
 #![feature(concat_idents)]
 #![feature(arbitrary_self_types)]
 #![feature(get_mut_unchecked)]
 #![feature(macro_metavar_expr)]
+#![feature(unsize)]
 
 extern crate alloc;
 
@@ -46,8 +48,8 @@ use kernel::{
     task::{KernelStack, ProcessBuilder, ProcessList, ProgramLoader, ThreadBuilder},
     vfs::{
         dentry::Dentry,
-        inode::Mode,
         mount::{do_mount, MS_NOATIME, MS_NODEV, MS_NOSUID, MS_RDONLY},
+        types::Permission,
         FsContext,
     },
     CharDevice,
@@ -192,16 +194,16 @@ async fn init_process(early_kstack: PRange) {
     {
         // We might want the serial initialized as soon as possible.
         driver::serial::init().unwrap();
-        driver::e1000e::register_e1000e_driver();
-        driver::ahci::register_ahci_driver();
+        driver::e1000e::register_e1000e_driver().await;
+        driver::ahci::register_ahci_driver().await;
     }
 
     #[cfg(target_arch = "riscv64")]
     {
         driver::serial::init().unwrap();
         driver::virtio::init_virtio_devices();
-        driver::e1000e::register_e1000e_driver();
-        driver::ahci::register_ahci_driver();
+        driver::e1000e::register_e1000e_driver().await;
+        driver::ahci::register_ahci_driver().await;
         driver::goldfish_rtc::probe();
     }
 
@@ -209,21 +211,26 @@ async fn init_process(early_kstack: PRange) {
     {
         driver::serial::init().unwrap();
         driver::virtio::init_virtio_devices();
-        driver::e1000e::register_e1000e_driver();
-        driver::ahci::register_ahci_driver();
+        driver::e1000e::register_e1000e_driver().await;
+        driver::ahci::register_ahci_driver().await;
     }
 
     fs::tmpfs::init();
-    fs::procfs::init();
+    fs::procfs::init().await;
     fs::fat32::init();
-    fs::ext4::init();
+    // fs::ext4::init();
 
     let load_info = {
         // mount fat32 /mnt directory
         let fs_context = FsContext::global();
-        let mnt_dir = Dentry::open(fs_context, Path::new(b"/mnt/").unwrap(), true).unwrap();
+        let mnt_dir = Dentry::open(fs_context, Path::new(b"/mnt/").unwrap(), true)
+            .await
+            .unwrap();
 
-        mnt_dir.mkdir(Mode::new(0o755)).unwrap();
+        mnt_dir
+            .mkdir(Permission::new(0o755))
+            .await
+            .expect("Failed to create /mnt directory");
 
         do_mount(
             &mnt_dir,
@@ -232,6 +239,7 @@ async fn init_process(early_kstack: PRange) {
             "fat32",
             MS_RDONLY | MS_NOATIME | MS_NODEV | MS_NOSUID,
         )
+        .await
         .unwrap();
 
         let init_names = [&b"/init"[..], &b"/sbin/init"[..], &b"/mnt/initsh"[..]];
@@ -239,7 +247,7 @@ async fn init_process(early_kstack: PRange) {
         let mut init_name = None;
         let mut init = None;
         for name in init_names {
-            if let Ok(dentry) = Dentry::open(fs_context, Path::new(name).unwrap(), true) {
+            if let Ok(dentry) = Dentry::open(fs_context, Path::new(name).unwrap(), true).await {
                 if dentry.is_valid() {
                     init_name = Some(CString::new(name).unwrap());
                     init = Some(dentry);
@@ -261,6 +269,7 @@ async fn init_process(early_kstack: PRange) {
         ];
 
         ProgramLoader::parse(fs_context, init_name, init.clone(), argv, envp)
+            .await
             .expect("Failed to parse init program")
             .load()
             .await

+ 1 - 29
src/prelude.rs

@@ -18,34 +18,6 @@ pub(crate) use crate::kernel::console::{
 
 pub(crate) use alloc::{boxed::Box, string::String, vec, vec::Vec};
 
-pub(crate) use core::{any::Any, fmt::Write, marker::PhantomData, str};
+pub(crate) use core::{fmt::Write, marker::PhantomData, str};
 
 pub use crate::sync::Spin;
-
-#[allow(dead_code)]
-pub trait AsAny: Send + Sync {
-    fn as_any(&self) -> &dyn Any;
-    fn as_any_mut(&mut self) -> &mut dyn Any;
-}
-
-macro_rules! impl_any {
-    ($t:ty) => {
-        impl AsAny for $t {
-            fn as_any(&self) -> &dyn Any {
-                self
-            }
-
-            fn as_any_mut(&mut self) -> &mut dyn Any {
-                self
-            }
-        }
-    };
-}
-
-macro_rules! addr_of_mut_field {
-    ($pointer:expr, $field:ident) => {
-        core::ptr::addr_of_mut!((*$pointer).$field)
-    };
-}
-
-pub(crate) use {addr_of_mut_field, impl_any};