Forráskód Böngészése

Merge branch 'master' into dev-drivers

greatbridf 7 hónapja
szülő
commit
3140316f9b
52 módosított fájl, 1641 hozzáadás és 403 törlés
  1. 11 0
      Cargo.lock
  2. 1 0
      Cargo.toml
  3. 18 5
      Makefile.src
  4. 2 0
      README.md
  5. 8 0
      configure
  6. 1 1
      crates/eonix_hal/src/arch/riscv64/config.rs
  7. 2 2
      crates/eonix_hal/src/arch/riscv64/memory.x
  8. 2 4
      crates/pointers/src/lib.rs
  9. 1 0
      crates/posix_types/src/lib.rs
  10. 11 0
      crates/posix_types/src/namei.rs
  11. 2 0
      crates/posix_types/src/result.rs
  12. 22 16
      crates/posix_types/src/stat.rs
  13. 1 1
      crates/posix_types/src/syscall_no/riscv64.rs
  14. 3 0
      src/driver.rs
  15. 7 12
      src/driver/e1000e.rs
  16. 1 1
      src/driver/e1000e/dev.rs
  17. 53 0
      src/driver/goldfish_rtc.rs
  18. 6 3
      src/driver/virtio.rs
  19. 304 0
      src/fs/ext4.rs
  20. 25 10
      src/fs/fat32.rs
  21. 1 1
      src/fs/fat32/dir.rs
  22. 1 0
      src/fs/mod.rs
  23. 13 6
      src/fs/procfs.rs
  24. 267 40
      src/fs/tmpfs.rs
  25. 1 0
      src/kernel.rs
  26. 21 13
      src/kernel/block.rs
  27. 5 4
      src/kernel/chardev.rs
  28. 2 2
      src/kernel/constants.rs
  29. 5 1
      src/kernel/mem/mm_list.rs
  30. 3 3
      src/kernel/mem/mm_list/page_fault.rs
  31. 33 0
      src/kernel/rtc/mod.rs
  32. 8 0
      src/kernel/syscall.rs
  33. 151 18
      src/kernel/syscall/file_rw.rs
  34. 36 40
      src/kernel/syscall/procops.rs
  35. 24 25
      src/kernel/syscall/sysinfo.rs
  36. 1 1
      src/kernel/task.rs
  37. 61 0
      src/kernel/task/futex.rs
  38. 6 3
      src/kernel/task/loader/elf.rs
  39. 7 6
      src/kernel/task/process_list.rs
  40. 16 2
      src/kernel/task/thread.rs
  41. 66 17
      src/kernel/timer.rs
  42. 16 20
      src/kernel/user/dataflow.rs
  43. 135 61
      src/kernel/vfs/dentry.rs
  44. 39 10
      src/kernel/vfs/dentry/dcache.rs
  45. 129 42
      src/kernel/vfs/file.rs
  46. 22 11
      src/kernel/vfs/filearray.rs
  47. 33 17
      src/kernel/vfs/inode.rs
  48. 3 2
      src/kernel/vfs/mount.rs
  49. 2 0
      src/lib.rs
  50. 35 3
      src/rcu.rs
  51. 9 0
      user-programs/init_script_riscv64.sh
  52. 9 0
      user-programs/init_script_x86_64.sh

+ 11 - 0
Cargo.lock

@@ -197,6 +197,7 @@ dependencies = [
  "eonix_preempt",
  "eonix_runtime",
  "eonix_sync",
+ "ext4_rs",
  "intrusive-collections",
  "intrusive_list",
  "itertools",
@@ -300,6 +301,16 @@ dependencies = [
  "intrusive-collections",
 ]
 
+[[package]]
+name = "ext4_rs"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a1a97344bde15b0ace15e265dab27228d4bdc37a0bfa8548c5645d7cfa6a144"
+dependencies = [
+ "bitflags 2.9.1",
+ "log",
+]
+
 [[package]]
 name = "fdt"
 version = "0.1.5"

+ 1 - 0
Cargo.toml

@@ -30,6 +30,7 @@ itertools = { version = "0.13.0", default-features = false }
 acpi = "5.2.0"
 align_ext = "0.1.0"
 xmas-elf = "0.10.0"
+ext4_rs = "1.3.2"
 smoltcp = { version = "0.12.0", default-features = false, features = [
     "alloc",
     "medium-ethernet",

+ 18 - 5
Makefile.src

@@ -7,6 +7,8 @@ QEMU ?= ##QEMU##
 GDB ?= ##GDB##
 FDISK ?= ##FDISK##
 
+IMG ?= ##IMAGE##
+
 COMMA := ,
 
 PROFILE = $(MODE)
@@ -46,17 +48,21 @@ BINARY_DIR := $(BINARY_DIR_BASE)/$(MODE)
 QEMU_ARGS += \
 	-machine virt -kernel $(BINARY_DIR)/eonix_kernel \
 	-device virtio-blk-device,drive=disk0,bus=virtio-mmio-bus.0 \
-	-device virtio-blk-device,drive=disk1,bus=virtio-mmio-bus.1 \
 	-device virtio-net-device,netdev=mynet0 \
 	-drive id=disk0,file=build/boot-riscv64.img,format=raw,if=none \
-	-drive id=disk1,file=build/fs-riscv64.img,format=raw,if=none \
 	-netdev user,id=mynet0 \
 	-rtc base=utc
 
+ifneq ($(IMG),)
+QEMU_ARGS += \
+	-drive id=disk1,file=$(IMG),format=raw,if=none \
+	-device virtio-blk-device,drive=disk1,bus=virtio-mmio-bus.1
+endif
+
 CARGO_FLAGS += --target riscv64gc-unknown-none-elf
 
 .PHONY: build
-build: $(BINARY_DIR)/eonix_kernel build/boot-riscv64.img build/fs-riscv64.img
+build: $(BINARY_DIR)/eonix_kernel build/boot-riscv64.img
 
 else ifeq ($(ARCH),x86_64)
 
@@ -65,11 +71,18 @@ BINARY_DIR := $(BINARY_DIR_BASE)/$(MODE)
 
 QEMU_ARGS += \
 	-machine q35 \
-	-device ahci,id=ahci -device ide-hd,drive=disk,bus=ahci.0 \
+	-device ahci,id=ahci \
+	-device ide-hd,drive=disk0,bus=ahci.0 \
 	-device e1000e,netdev=mynet0 \
-	-drive id=disk,file=build/boot-x86_64.img,format=raw,if=none \
+	-drive id=disk0,file=build/boot-x86_64.img,format=raw,if=none \
 	-netdev user,id=mynet0
 
+ifneq ($(IMG),)
+QEMU_ARGS += \
+	-drive id=disk1,file=$(IMG),format=raw,if=none \
+	-device ide-hd,drive=disk1,bus=ahci.1
+endif
+
 CARGO_FLAGS += --target x86_64-unknown-none.json
 
 .PHONY: build

+ 2 - 0
README.md

@@ -205,6 +205,7 @@ make tmux-debug
 - `QEMU`: 用于调试运行的 QEMU。默认使用 `qemu-system-$(ARCH)`。
 - `GDB`: 用于 `make debug` 的 GDB。我们将默认查找 `$(ARCH)-elf-gdb` 并检查支持的架构。
 - `FDISK`: 用于创建磁盘镜像分区表的 fdisk 可执行文件,要求使用来自 util-linux 版本的 fdisk。默认使用 `fdisk`。
+- `IMG`: 除启动磁盘以外,额外的磁盘镜像文件。默认不使用。
 
 在运行 make 时可以指定的额外选项:
 
@@ -217,3 +218,4 @@ make tmux-debug
 - `FDISK`: 手动指定 fdisk 路径。
 - `QEMU_ACCEL`: 手动指定要使用的 qemu 加速方法。
 - `DEBUG_TRAPS`: 是否要进行 trap 的调试,使 qemu 输出详细的 trap 日志。
+- `FEATURES`: 手动指定要编译的特性,使用逗号分隔。具体见 `Cargo.toml` 中的 `features` 字段。

+ 8 - 0
configure

@@ -112,9 +112,17 @@ else
     echo `which mkfs.fat`
 fi
 
+event "checking additional image"
+if [ "$IMG" = "" ]; then
+    echo "no"
+else
+    echo "$IMG"
+fi
+
 cp Makefile.src Makefile
 sed -i '' -e "s|##DEFAULT_ARCH##|$DEFAULT_ARCH|" Makefile > /dev/null 2>&1
 sed -i '' -e "s|##GDB##|$GDB|" Makefile > /dev/null 2>&1
 sed -i '' -e "s|##QEMU##|$QEMU|" Makefile > /dev/null 2>&1
 sed -i '' -e "s|##FDISK##|$FDISK|" Makefile > /dev/null 2>&1
+sed -i '' -e "s|##IMAGE##|$IMG|" Makefile > /dev/null 2>&1
 exit 0

+ 1 - 1
crates/eonix_hal/src/arch/riscv64/config.rs

@@ -47,5 +47,5 @@ pub mod platform {
 }
 
 pub mod time {
-    pub const INTERRUPTS_PER_SECOND: usize = 100;
+    pub const INTERRUPTS_PER_SECOND: usize = 1000;
 }

+ 2 - 2
crates/eonix_hal/src/arch/riscv64/memory.x

@@ -2,10 +2,10 @@ OUTPUT_ARCH(riscv)
 ENTRY(_start)
 
 MEMORY {
-    RAM    : org = 0x0000000080200000, len = 4M
+    RAM    : org = 0x0000000080200000, len = 8M
     VDSO   : org = 0x00007f0000000000, len = 4K
     KBSS   : org = 0xffffffff40000000, len = 2M
-    KIMAGE : org = 0xffffffff80200000, len = 2M
+    KIMAGE : org = 0xffffffff80200000, len = 8M
 }
 
 REGION_ALIAS("REGION_TEXT", KIMAGE);

+ 2 - 4
crates/pointers/src/lib.rs

@@ -42,11 +42,9 @@ impl<'a, T: ?Sized> BorrowedArc<'a, T> {
         }
     }
 
-    #[allow(dead_code)]
-    pub fn new(ptr: &'a *const T) -> Self {
-        assert!(!ptr.is_null());
+    pub fn new(ptr: &'a Arc<T>) -> Self {
         Self {
-            arc: ManuallyDrop::new(unsafe { Arc::from_raw(*ptr) }),
+            arc: ManuallyDrop::new(unsafe { core::mem::transmute_copy(ptr) }),
             _phantom: PhantomData,
         }
     }

+ 1 - 0
crates/posix_types/src/lib.rs

@@ -2,6 +2,7 @@
 
 pub mod constants;
 pub mod ctypes;
+pub mod namei;
 pub mod open;
 pub mod result;
 pub mod signal;

+ 11 - 0
crates/posix_types/src/namei.rs

@@ -0,0 +1,11 @@
+use bitflags::bitflags;
+
+bitflags! {
+    #[derive(Debug, Clone, Copy)]
+    pub struct RenameFlags: u32 {
+        /// Do not overwrite existing files
+        const RENAME_NOREPLACE = 0x1;
+        /// Exchange the names of two files
+        const RENAME_EXCHANGE = 0x2;
+    }
+}

+ 2 - 0
crates/posix_types/src/result.rs

@@ -1,5 +1,6 @@
 pub enum PosixError {
     EFAULT = 14,
+    EXDEV = 18,
     EINVAL = 22,
 }
 
@@ -7,6 +8,7 @@ impl From<PosixError> for u32 {
     fn from(error: PosixError) -> Self {
         match error {
             PosixError::EFAULT => 14,
+            PosixError::EXDEV => 18,
             PosixError::EINVAL => 22,
         }
     }

+ 22 - 16
crates/posix_types/src/stat.rs

@@ -1,9 +1,8 @@
 #[repr(C)]
 #[derive(Debug, Default, Copy, Clone)]
 pub struct StatXTimestamp {
-    pub tv_sec: i64,
+    pub tv_sec: u64,
     pub tv_nsec: u32,
-    pub __reserved: i32,
 }
 
 #[repr(C)]
@@ -37,7 +36,14 @@ pub struct StatX {
 #[derive(Debug, Default, Copy, Clone)]
 pub struct TimeSpec {
     pub tv_sec: u64,
-    pub tv_nsec: u64,
+    pub tv_nsec: u32,
+}
+
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone)]
+pub struct TimeVal {
+    pub tv_sec: u64,
+    pub tv_usec: u32,
 }
 
 #[repr(C)]
@@ -46,14 +52,14 @@ pub struct Stat {
     pub st_dev: u64,
     pub st_ino: u64,
     pub st_mode: u32,
-    pub st_nlink: usize,
+    pub st_nlink: u32,
     pub st_uid: u32,
     pub st_gid: u32,
     pub st_rdev: u64,
-    __padding: usize,
+    __padding: u64,
 
     pub st_size: u64,
-    pub st_blksize: usize,
+    pub st_blksize: u32,
     __padding2: u32,
 
     pub st_blocks: u64,
@@ -65,31 +71,31 @@ pub struct Stat {
 impl From<StatX> for Stat {
     fn from(statx: StatX) -> Self {
         Self {
-            st_dev: statx.stx_dev_major as u64 | ((statx.stx_dev_minor as u64) << 32),
+            st_dev: statx.stx_dev_minor as u64 | ((statx.stx_dev_major as u64) << 8),
             st_ino: statx.stx_ino,
             st_mode: statx.stx_mode as u32,
-            st_nlink: statx.stx_nlink as usize,
+            st_nlink: statx.stx_nlink,
             st_uid: statx.stx_uid,
             st_gid: statx.stx_gid,
-            st_rdev: statx.stx_rdev_major as u64 | ((statx.stx_rdev_minor as u64) << 32),
+            st_rdev: statx.stx_rdev_minor as u64 | ((statx.stx_rdev_major as u64) << 8),
             __padding: 0,
 
             st_size: statx.stx_size,
-            st_blksize: statx.stx_blksize as usize,
+            st_blksize: statx.stx_blksize,
             __padding2: 0,
 
             st_blocks: statx.stx_blocks,
             st_atime: TimeSpec {
-                tv_sec: statx.stx_atime.tv_sec as u64,
-                tv_nsec: statx.stx_atime.tv_nsec as u64,
+                tv_sec: statx.stx_atime.tv_sec,
+                tv_nsec: statx.stx_atime.tv_nsec,
             },
             st_mtime: TimeSpec {
-                tv_sec: statx.stx_mtime.tv_sec as u64,
-                tv_nsec: statx.stx_mtime.tv_nsec as u64,
+                tv_sec: statx.stx_mtime.tv_sec,
+                tv_nsec: statx.stx_mtime.tv_nsec,
             },
             st_ctime: TimeSpec {
-                tv_sec: statx.stx_ctime.tv_sec as u64,
-                tv_nsec: statx.stx_ctime.tv_nsec as u64,
+                tv_sec: statx.stx_ctime.tv_sec,
+                tv_nsec: statx.stx_ctime.tv_nsec,
             },
         }
     }

+ 1 - 1
crates/posix_types/src/syscall_no/riscv64.rs

@@ -88,7 +88,7 @@ pub const SYS_SYNC_FILE_RANGE: usize = 84;
 pub const SYS_TIMERFD_CREATE: usize = 85;
 pub const SYS_TIMERFD_SETTIME: usize = 411;
 pub const SYS_TIMERFD_GETTIME: usize = 410;
-pub const SYS_UTIMENSAT: usize = 412;
+pub const SYS_UTIMENSAT: usize = 88;
 pub const SYS_ACCT: usize = 89;
 pub const SYS_CAPGET: usize = 90;
 pub const SYS_CAPSET: usize = 91;

+ 3 - 0
src/driver.rs

@@ -7,3 +7,6 @@ pub mod virtio;
 
 #[cfg(target_arch = "riscv64")]
 pub mod sbi_console;
+
+#[cfg(target_arch = "riscv64")]
+pub mod goldfish_rtc;

+ 7 - 12
src/driver/e1000e.rs

@@ -1,3 +1,9 @@
+mod defs;
+mod dev;
+mod error;
+mod rx_desc;
+mod tx_desc;
+
 use crate::kernel::constants::{EAGAIN, EFAULT, EINVAL, EIO};
 use crate::kernel::interrupt::register_irq_handler;
 use crate::kernel::mem::paging::{self, AllocZeroed};
@@ -9,23 +15,12 @@ use alloc::boxed::Box;
 use alloc::sync::Arc;
 use alloc::vec::Vec;
 use core::ptr::NonNull;
+use dev::E1000eDev;
 use eonix_hal::fence::memory_barrier;
 use eonix_mm::address::{Addr, PAddr};
 use eonix_sync::SpinIrq;
 use paging::Page;
 
-mod defs;
-mod dev;
-mod error;
-mod rx_desc;
-mod tx_desc;
-
-use crate::kernel::constants::{EINVAL, EIO};
-use crate::kernel::pcie::{self, Header, PCIDevice, PCIDriver, PciError};
-use alloc::sync::Arc;
-use dev::E1000eDev;
-use eonix_mm::address::PAddr;
-
 struct Driver {
     dev_id: u16,
 }

+ 1 - 1
src/driver/e1000e/dev.rs

@@ -5,11 +5,11 @@ use super::tx_desc::TxDescriptorTable;
 use crate::net::netdev::{NetDevice, PhyDevice};
 use crate::net::{LinkSpeed, LinkState, LinkStatus, NetBuffer, NetError};
 use crate::prelude::*;
-use crate::sync::fence::memory_barrier;
 use crate::{kernel::interrupt::register_irq_handler, net::Mac};
 use alloc::sync::Arc;
 use core::ops::DerefMut;
 use core::ptr::NonNull;
+use eonix_hal::fence::memory_barrier;
 use eonix_hal::mm::ArchPhysAccess;
 use eonix_mm::address::{Addr, PAddr, PRange, PhysAccess};
 use eonix_sync::{SpinIrq, WaitList};

+ 53 - 0
src/driver/goldfish_rtc.rs

@@ -0,0 +1,53 @@
+use crate::kernel::{
+    rtc::{register_rtc, RealTimeClock},
+    timer::Instant,
+};
+use core::ptr::NonNull;
+use eonix_hal::{arch_exported::fdt::FDT, mm::ArchPhysAccess};
+use eonix_log::println_warn;
+use eonix_mm::address::{PAddr, PhysAccess};
+
+#[cfg(not(target_arch = "riscv64"))]
+compile_error!("Goldfish RTC driver is only supported on RISC-V architecture");
+
+struct GoldfishRtc {
+    time_low: NonNull<u32>,
+    time_high: NonNull<u32>,
+}
+
+unsafe impl Send for GoldfishRtc {}
+unsafe impl Sync for GoldfishRtc {}
+
+impl RealTimeClock for GoldfishRtc {
+    fn now(&self) -> Instant {
+        // SAFETY: The pointer is guaranteed to be valid as long as the RTC is registered.
+        let time_high = unsafe { self.time_high.read_volatile() };
+        let time_low = unsafe { self.time_low.read_volatile() };
+
+        let nsecs = ((time_high as u64) << 32) | (time_low as u64);
+        let secs_since_epoch = nsecs / 1_000_000_000;
+        let nsecs_within = nsecs % 1_000_000_000;
+
+        Instant::new(secs_since_epoch as u64, nsecs_within as u32)
+    }
+}
+
+pub fn probe() {
+    let Some(rtc) = FDT.find_compatible(&["google,goldfish-rtc"]) else {
+        println_warn!("Goldfish RTC not found in FDT");
+        return;
+    };
+
+    let mut regs = rtc.reg().expect("Goldfish RTC reg not found");
+    let base = regs
+        .next()
+        .map(|r| PAddr::from(r.starting_address as usize))
+        .expect("Goldfish RTC base address not found");
+
+    let goldfish_rtc = GoldfishRtc {
+        time_low: unsafe { ArchPhysAccess::as_ptr(base) },
+        time_high: unsafe { ArchPhysAccess::as_ptr(base + 4) },
+    };
+
+    register_rtc(goldfish_rtc);
+}

+ 6 - 3
src/driver/virtio.rs

@@ -7,7 +7,7 @@ use crate::kernel::{
     block::{make_device, BlockDevice},
     mem::{AsMemoryBlock, MemoryBlock, Page},
 };
-use alloc::sync::Arc;
+use alloc::{sync::Arc, vec::Vec};
 use core::num::NonZero;
 use eonix_hal::{arch_exported::fdt::FDT, mm::ArchPhysAccess};
 use eonix_log::{println_info, println_warn};
@@ -84,7 +84,7 @@ unsafe impl Hal for HAL {
 
 pub fn init_virtio_devices() {
     let mut disk_id = 0;
-    for reg in FDT
+    let mut virtio_devices: Vec<_> = FDT
         .all_nodes()
         .filter(|node| {
             node.compatible()
@@ -92,7 +92,10 @@ pub fn init_virtio_devices() {
         })
         .filter_map(|node| node.reg())
         .flatten()
-    {
+        .collect();
+    virtio_devices.sort_by_key(|reg| reg.starting_address);
+
+    for reg in virtio_devices {
         let base = PAddr::from(reg.starting_address as usize);
         let size = reg.size.expect("Virtio device must have a size");
 

+ 304 - 0
src/fs/ext4.rs

@@ -0,0 +1,304 @@
+use core::sync::atomic::{AtomicU32, AtomicU64};
+
+use crate::{
+    io::{Buffer, ByteBuffer},
+    kernel::{
+        block::BlockDevice,
+        constants::EIO,
+        timer::Instant,
+        vfs::{
+            dentry::Dentry,
+            inode::{define_struct_inode, AtomicNlink, Ino, Inode, InodeData},
+            mount::{register_filesystem, Mount, MountCreator},
+            s_isdir, s_isreg,
+            vfs::Vfs,
+            DevId, FsContext, TimeSpec,
+        },
+    },
+    path::Path,
+    prelude::*,
+};
+use alloc::{
+    collections::btree_map::{BTreeMap, Entry},
+    sync::Arc,
+};
+use eonix_runtime::task::Task;
+use eonix_sync::RwLock;
+use ext4_rs::{BlockDevice as Ext4BlockDeviceTrait, Ext4Error};
+use ext4_rs::{Errno, Ext4};
+
+pub struct Ext4BlockDevice {
+    device: Arc<BlockDevice>,
+}
+
+impl Ext4BlockDevice {
+    pub fn new(device: Arc<BlockDevice>) -> Self {
+        Self { device }
+    }
+}
+
+impl Ext4BlockDeviceTrait for Ext4BlockDevice {
+    fn read_offset(&self, offset: usize) -> Vec<u8> {
+        let mut buffer = vec![0u8; 4096];
+        let mut byte_buffer = ByteBuffer::new(buffer.as_mut_slice());
+
+        let _ = self
+            .device
+            .read_some(offset, &mut byte_buffer)
+            .expect("Failed to read from block device");
+
+        buffer
+    }
+
+    fn write_offset(&self, _offset: usize, _data: &[u8]) {
+        todo!()
+    }
+}
+
+impl_any!(Ext4Fs);
+struct Ext4Fs {
+    inner: Ext4,
+    device: Arc<BlockDevice>,
+    icache: RwLock<BTreeMap<Ino, Ext4Inode>>,
+}
+
+impl Vfs for Ext4Fs {
+    fn io_blksize(&self) -> usize {
+        4096
+    }
+
+    fn fs_devid(&self) -> DevId {
+        self.device.devid()
+    }
+
+    fn is_read_only(&self) -> bool {
+        true
+    }
+}
+
+impl Ext4Fs {
+    fn try_get(&self, icache: &BTreeMap<Ino, Ext4Inode>, ino: u64) -> Option<Arc<dyn Inode>> {
+        icache.get(&ino).cloned().map(Ext4Inode::into_inner)
+    }
+
+    fn get_or_insert(
+        &self,
+        icache: &mut BTreeMap<Ino, Ext4Inode>,
+        mut idata: InodeData,
+    ) -> Arc<dyn Inode> {
+        match icache.entry(idata.ino) {
+            Entry::Occupied(occupied) => occupied.get().clone().into_inner(),
+            Entry::Vacant(vacant) => {
+                let mode = *idata.mode.get_mut();
+                if s_isreg(mode) {
+                    vacant
+                        .insert(Ext4Inode::File(Arc::new(FileInode { idata })))
+                        .clone()
+                        .into_inner()
+                } else if s_isdir(mode) {
+                    vacant
+                        .insert(Ext4Inode::Dir(Arc::new(DirInode { idata })))
+                        .clone()
+                        .into_inner()
+                } else {
+                    println_warn!("ext4: Unsupported inode type: {mode:#o}");
+                    vacant
+                        .insert(Ext4Inode::File(Arc::new(FileInode { idata })))
+                        .clone()
+                        .into_inner()
+                }
+            }
+        }
+    }
+}
+
+impl Ext4Fs {
+    pub fn create(device: Arc<BlockDevice>) -> KResult<(Arc<Self>, Arc<dyn Inode>)> {
+        let ext4_device = Ext4BlockDevice::new(device.clone());
+        let ext4 = Ext4::open(Arc::new(ext4_device));
+
+        let ext4fs = Arc::new(Self {
+            inner: ext4,
+            device,
+            icache: RwLock::new(BTreeMap::new()),
+        });
+
+        let root_inode = {
+            let mut icache = Task::block_on(ext4fs.icache.write());
+            let root_inode = ext4fs.inner.get_inode_ref(2);
+
+            ext4fs.get_or_insert(
+                &mut icache,
+                InodeData {
+                    ino: root_inode.inode_num as Ino,
+                    size: AtomicU64::new(root_inode.inode.size()),
+                    nlink: AtomicNlink::new(root_inode.inode.links_count() as _),
+                    uid: AtomicU32::new(root_inode.inode.uid() as _),
+                    gid: AtomicU32::new(root_inode.inode.gid() as _),
+                    mode: AtomicU32::new(root_inode.inode.mode() as _),
+                    atime: Spin::new(Instant::new(
+                        root_inode.inode.atime() as _,
+                        root_inode.inode.i_atime_extra() as _,
+                    )),
+                    ctime: Spin::new(Instant::new(
+                        root_inode.inode.ctime() as _,
+                        root_inode.inode.i_ctime_extra() as _,
+                    )),
+                    mtime: Spin::new(Instant::new(
+                        root_inode.inode.mtime() as _,
+                        root_inode.inode.i_mtime_extra() as _,
+                    )),
+                    rwsem: RwLock::new(()),
+                    vfs: Arc::downgrade(&ext4fs) as _,
+                },
+            )
+        };
+
+        Ok((ext4fs, root_inode))
+    }
+}
+
+#[derive(Clone)]
+enum Ext4Inode {
+    File(Arc<FileInode>),
+    Dir(Arc<DirInode>),
+}
+
+impl Ext4Inode {
+    fn into_inner(self) -> Arc<dyn Inode> {
+        match self {
+            Ext4Inode::File(inode) => inode,
+            Ext4Inode::Dir(inode) => inode,
+        }
+    }
+}
+
+define_struct_inode! {
+    struct FileInode;
+}
+
+define_struct_inode! {
+    struct DirInode;
+}
+
+impl Inode for FileInode {
+    fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize> {
+        let vfs = self.vfs.upgrade().ok_or(EIO)?;
+        let ext4fs = vfs.as_any().downcast_ref::<Ext4Fs>().unwrap();
+
+        let mut temp_buf = vec![0u8; buffer.total()];
+        match ext4fs.inner.read_at(self.ino as u32, offset, &mut temp_buf) {
+            Ok(bytes_read) => {
+                let _ = buffer.fill(&temp_buf[..bytes_read])?;
+                Ok(buffer.wrote())
+            }
+            Err(e) => Err(e.error() as u32),
+        }
+    }
+}
+
+impl Inode for DirInode {
+    fn lookup(&self, dentry: &Arc<Dentry>) -> KResult<Option<Arc<dyn Inode>>> {
+        let vfs = self.vfs.upgrade().ok_or(EIO)?;
+        let ext4fs = vfs.as_any().downcast_ref::<Ext4Fs>().unwrap();
+
+        let name = dentry.get_name();
+        let name = String::from_utf8_lossy(&name);
+        let lookup_result = ext4fs.inner.fuse_lookup(self.ino, &name);
+
+        const EXT4_ERROR_ENOENT: Ext4Error = Ext4Error::new(Errno::ENOENT);
+        let attr = match lookup_result {
+            Ok(attr) => attr,
+            Err(EXT4_ERROR_ENOENT) => return Ok(None),
+            Err(error) => return Err(error.error() as u32),
+        };
+
+        // Fast path: if the inode is already in the cache, return it.
+        if let Some(inode) = ext4fs.try_get(&Task::block_on(ext4fs.icache.read()), attr.ino as u64)
+        {
+            return Ok(Some(inode));
+        }
+
+        let extra_perm = attr.perm.bits() as u32 & 0o7000;
+        let perm = attr.perm.bits() as u32 & 0o0700;
+        let real_perm = extra_perm | perm | perm >> 3 | perm >> 6;
+
+        // Create a new inode based on the attributes.
+        let mut icache = Task::block_on(ext4fs.icache.write());
+        let inode = ext4fs.get_or_insert(
+            &mut icache,
+            InodeData {
+                ino: attr.ino as Ino,
+                size: AtomicU64::new(attr.size),
+                nlink: AtomicNlink::new(attr.nlink as _),
+                uid: AtomicU32::new(attr.uid),
+                gid: AtomicU32::new(attr.gid),
+                mode: AtomicU32::new(attr.kind.bits() as u32 | real_perm),
+                atime: Spin::new(Instant::new(attr.atime as _, 0)),
+                ctime: Spin::new(Instant::new(attr.ctime as _, 0)),
+                mtime: Spin::new(Instant::new(attr.mtime as _, 0)),
+                rwsem: RwLock::new(()),
+                vfs: self.vfs.clone(),
+            },
+        );
+
+        Ok(Some(inode))
+    }
+
+    fn do_readdir(
+        &self,
+        offset: usize,
+        callback: &mut dyn FnMut(&[u8], Ino) -> KResult<core::ops::ControlFlow<(), ()>>,
+    ) -> KResult<usize> {
+        let vfs = self.vfs.upgrade().ok_or(EIO)?;
+        let ext4fs = vfs.as_any().downcast_ref::<Ext4Fs>().unwrap();
+
+        let entries = ext4fs
+            .inner
+            .fuse_readdir(self.ino as u64, 0, offset as i64)
+            .map_err(|err| err.error() as u32)?;
+        let mut current_offset = 0;
+
+        for entry in entries {
+            let name_len = entry.name_len as usize;
+            let name = &entry.name[..name_len];
+
+            if callback(name, entry.inode as Ino)?.is_break() {
+                break;
+            }
+
+            current_offset += 1;
+        }
+        Ok(current_offset)
+    }
+}
+
+struct Ext4MountCreator;
+
+impl MountCreator for Ext4MountCreator {
+    fn check_signature(&self, mut first_block: &[u8]) -> KResult<bool> {
+        match first_block.split_off(1080..) {
+            Some([0x53, 0xef, ..]) => Ok(true), // Superblock signature
+            Some(..) => Ok(false),
+            None => Err(EIO),
+        }
+    }
+
+    fn create_mount(&self, source: &str, _flags: u64, mp: &Arc<Dentry>) -> KResult<Mount> {
+        let source = source.as_bytes();
+
+        let path = Path::new(source)?;
+        let device_dentry =
+            Dentry::open_recursive(&FsContext::global(), Dentry::root(), path, true, 0)?;
+        let devid = device_dentry.get_inode()?.devid()?;
+        let device = BlockDevice::get(devid)?;
+
+        let (ext4fs, root_inode) = Ext4Fs::create(device)?;
+
+        Mount::new(mp, ext4fs, root_inode)
+    }
+}
+
+pub fn init() {
+    register_filesystem("ext4", Arc::new(Ext4MountCreator)).unwrap();
+}

+ 25 - 10
src/fs/fat32.rs

@@ -174,7 +174,12 @@ impl FatFs {
 
         let root_dir_cluster_count = ClusterIterator::new(fat, fatfs.rootdir_cluster).count();
         let root_dir_size = root_dir_cluster_count as u32 * info.sectors_per_cluster as u32 * 512;
-        let root_inode = DirInode::new(info.root_cluster as Ino, fatfs.weak.clone(), root_dir_size);
+
+        let root_inode = DirInode::new(
+            (info.root_cluster & !0xF000_0000) as Ino,
+            fatfs.weak.clone(),
+            root_dir_size,
+        );
 
         Ok((fatfs_arc, root_inode))
     }
@@ -195,14 +200,16 @@ impl<'fat> Iterator for ClusterIterator<'fat> {
     type Item = ClusterNo;
 
     fn next(&mut self) -> Option<Self::Item> {
-        const EOC: ClusterNo = 0x0FFFFFF8;
-        let next = self.cur;
-
-        if next >= EOC {
-            None
-        } else {
-            self.cur = self.fat[next as usize];
-            Some(next)
+        const EOC: ClusterNo = 0x0FFF_FFF8;
+        const INVL: ClusterNo = 0xF000_0000;
+
+        match self.cur {
+            ..2 | EOC..INVL => None,
+            INVL.. => unreachable!("Invalid cluster number: {}", self.cur),
+            next => {
+                self.cur = self.fat[next as usize] & !INVL;
+                Some(next)
+            }
         }
     }
 }
@@ -296,7 +303,7 @@ impl Inode for DirInode {
         let entry = entries.find(|entry| {
             entry
                 .as_ref()
-                .map(|entry| &entry.filename == dentry.name())
+                .map(|entry| &entry.filename == &***dentry.name())
                 .unwrap_or(true)
         });
 
@@ -343,6 +350,14 @@ impl Inode for DirInode {
 struct FatMountCreator;
 
 impl MountCreator for FatMountCreator {
+    fn check_signature(&self, mut first_block: &[u8]) -> KResult<bool> {
+        match first_block.split_off(82..) {
+            Some([b'F', b'A', b'T', b'3', b'2', b' ', b' ', b' ', ..]) => Ok(true),
+            Some(..) => Ok(false),
+            None => Err(EIO),
+        }
+    }
+
     fn create_mount(&self, _source: &str, _flags: u64, mp: &Arc<Dentry>) -> KResult<Mount> {
         let (fatfs, root_inode) = FatFs::create(make_device(8, 1))?;
 

+ 1 - 1
src/fs/fat32/dir.rs

@@ -232,7 +232,7 @@ where
             size: entry.size,
             entry_offset,
             filename,
-            cluster: entry.cluster_low as u32 | ((entry.cluster_high as u32) << 16),
+            cluster: entry.cluster_low as u32 | (((entry.cluster_high & !0xF000) as u32) << 16),
             is_directory: entry.is_directory(),
         }))
     }

+ 1 - 0
src/fs/mod.rs

@@ -1,3 +1,4 @@
 pub mod fat32;
 pub mod procfs;
 pub mod tmpfs;
+pub mod ext4;

+ 13 - 6
src/fs/procfs.rs

@@ -1,4 +1,5 @@
 use crate::kernel::constants::{EACCES, ENOTDIR};
+use crate::kernel::timer::Instant;
 use crate::{
     io::Buffer,
     kernel::{
@@ -76,13 +77,16 @@ impl FileInode {
             mode |= 0o200;
         }
 
-        let inode = Self {
+        let mut inode = Self {
             idata: InodeData::new(ino, vfs),
             file,
         };
 
         inode.idata.mode.store(mode, Ordering::Relaxed);
         inode.idata.nlink.store(1, Ordering::Relaxed);
+        *inode.ctime.get_mut() = Instant::now();
+        *inode.mtime.get_mut() = Instant::now();
+        *inode.atime.get_mut() = Instant::now();
 
         Arc::new(inode)
     }
@@ -121,6 +125,9 @@ impl DirInode {
             addr_of_mut_field!(inode, entries).write(Locked::new(vec![], rwsem));
             addr_of_mut_field!(&mut *inode, mode).write((S_IFDIR | 0o755).into());
             addr_of_mut_field!(&mut *inode, nlink).write(1.into());
+            addr_of_mut_field!(&mut *inode, ctime).write(Spin::new(Instant::now()));
+            addr_of_mut_field!(&mut *inode, mtime).write(Spin::new(Instant::now()));
+            addr_of_mut_field!(&mut *inode, atime).write(Spin::new(Instant::now()));
         })
     }
 }
@@ -132,11 +139,7 @@ impl Inode for DirInode {
             .entries
             .access(lock.prove())
             .iter()
-            .find_map(|(name, node)| {
-                name.as_ref()
-                    .eq(dentry.name().as_ref())
-                    .then(|| node.unwrap())
-            }))
+            .find_map(|(name, node)| (name == &***dentry.name()).then(|| node.unwrap())))
     }
 
     fn do_readdir(
@@ -202,6 +205,10 @@ impl MountCreator for ProcFsMountCreator {
         let root_inode = vfs.root_node.clone();
         Mount::new(mp, vfs, root_inode)
     }
+
+    fn check_signature(&self, _: &[u8]) -> KResult<bool> {
+        Ok(true)
+    }
 }
 
 pub fn root() -> ProcFsNode {

+ 267 - 40
src/fs/tmpfs.rs

@@ -1,5 +1,7 @@
 use crate::io::Stream;
-use crate::kernel::constants::{EINVAL, EIO, EISDIR};
+use crate::kernel::constants::{EEXIST, EINVAL, EIO, EISDIR, ENOENT, ENOSYS, ENOTDIR};
+use crate::kernel::timer::Instant;
+use crate::kernel::vfs::inode::RenameData;
 use crate::{
     io::Buffer,
     kernel::constants::{S_IFBLK, S_IFCHR, S_IFDIR, S_IFLNK, S_IFREG},
@@ -16,7 +18,7 @@ use crate::{
 use alloc::sync::{Arc, Weak};
 use core::{ops::ControlFlow, sync::atomic::Ordering};
 use eonix_runtime::task::Task;
-use eonix_sync::{AsProof as _, AsProofMut as _, Locked, ProofMut};
+use eonix_sync::{AsProof as _, AsProofMut as _, Locked, Mutex, ProofMut};
 use itertools::Itertools;
 
 fn acquire(vfs: &Weak<dyn Vfs>) -> KResult<Arc<dyn Vfs>> {
@@ -42,6 +44,9 @@ impl NodeInode {
 
             addr_of_mut_field!(&mut *inode, mode).write(mode.into());
             addr_of_mut_field!(&mut *inode, nlink).write(1.into());
+            addr_of_mut_field!(&mut *inode, ctime).write(Spin::new(Instant::now()));
+            addr_of_mut_field!(&mut *inode, mtime).write(Spin::new(Instant::now()));
+            addr_of_mut_field!(&mut *inode, atime).write(Spin::new(Instant::now()));
         })
     }
 }
@@ -67,19 +72,60 @@ impl DirectoryInode {
             addr_of_mut_field!(&mut *inode, size).write(1.into());
             addr_of_mut_field!(&mut *inode, mode).write((S_IFDIR | (mode & 0o777)).into());
             addr_of_mut_field!(&mut *inode, nlink).write(1.into()); // link from `.` to itself
+            addr_of_mut_field!(&mut *inode, ctime).write(Spin::new(Instant::now()));
+            addr_of_mut_field!(&mut *inode, mtime).write(Spin::new(Instant::now()));
+            addr_of_mut_field!(&mut *inode, atime).write(Spin::new(Instant::now()));
         })
     }
 
     fn link(&self, name: Arc<[u8]>, file: &dyn Inode, dlock: ProofMut<'_, ()>) {
+        let now = Instant::now();
+
         // SAFETY: Only `unlink` will do something based on `nlink` count
         //         No need to synchronize here
         file.nlink.fetch_add(1, Ordering::Relaxed);
+        *self.ctime.lock() = now;
 
         // SAFETY: `rwsem` has done the synchronization
         self.size.fetch_add(1, Ordering::Relaxed);
+        *self.mtime.lock() = now;
 
         self.entries.access_mut(dlock).push((name, file.ino));
     }
+
+    fn do_unlink(
+        &self,
+        file: &Arc<dyn Inode>,
+        filename: &[u8],
+        entries: &mut Vec<(Arc<[u8]>, Ino)>,
+        now: Instant,
+        decrease_size: bool,
+        _dir_lock: ProofMut<()>,
+        _file_lock: ProofMut<()>,
+    ) -> KResult<()> {
+        // SAFETY: `file_lock` has done the synchronization
+        if file.mode.load(Ordering::Relaxed) & S_IFDIR != 0 {
+            return Err(EISDIR);
+        }
+
+        entries.retain(|(name, ino)| *ino != file.ino || name.as_ref() != filename);
+
+        if decrease_size {
+            // SAFETY: `dir_lock` has done the synchronization
+            self.size.fetch_sub(1, Ordering::Relaxed);
+        }
+
+        *self.mtime.lock() = now;
+
+        // The last reference to the inode is held by some dentry
+        // and will be released when the dentry is released
+
+        // SAFETY: `file_lock` has done the synchronization
+        file.nlink.fetch_sub(1, Ordering::Relaxed);
+        *file.ctime.lock() = now;
+
+        Ok(())
+    }
 }
 
 impl Inode for DirectoryInode {
@@ -108,7 +154,7 @@ impl Inode for DirectoryInode {
         let ino = vfs.assign_ino();
         let file = FileInode::new(ino, self.vfs.clone(), mode);
 
-        self.link(at.name().clone(), file.as_ref(), rwsem.prove_mut());
+        self.link(at.get_name(), file.as_ref(), rwsem.prove_mut());
         at.save_reg(file)
     }
 
@@ -130,7 +176,7 @@ impl Inode for DirectoryInode {
             dev,
         );
 
-        self.link(at.name().clone(), file.as_ref(), rwsem.prove_mut());
+        self.link(at.get_name(), file.as_ref(), rwsem.prove_mut());
         at.save_reg(file)
     }
 
@@ -143,7 +189,7 @@ impl Inode for DirectoryInode {
         let ino = vfs.assign_ino();
         let file = SymlinkInode::new(ino, self.vfs.clone(), target.into());
 
-        self.link(at.name().clone(), file.as_ref(), rwsem.prove_mut());
+        self.link(at.get_name(), file.as_ref(), rwsem.prove_mut());
         at.save_symlink(file)
     }
 
@@ -156,49 +202,32 @@ impl Inode for DirectoryInode {
         let ino = vfs.assign_ino();
         let newdir = DirectoryInode::new(ino, self.vfs.clone(), mode);
 
-        self.link(at.name().clone(), newdir.as_ref(), rwsem.prove_mut());
+        self.link(at.get_name(), newdir.as_ref(), rwsem.prove_mut());
         at.save_dir(newdir)
     }
 
     fn unlink(&self, at: &Arc<Dentry>) -> KResult<()> {
         let _vfs = acquire(&self.vfs)?;
 
-        let dlock = Task::block_on(self.rwsem.write());
+        let dir_lock = Task::block_on(self.rwsem.write());
 
         let file = at.get_inode()?;
-        let _flock = file.rwsem.write();
-
-        // SAFETY: `flock` has done the synchronization
-        if file.mode.load(Ordering::Relaxed) & S_IFDIR != 0 {
-            return Err(EISDIR);
-        }
-
-        let entries = self.entries.access_mut(dlock.prove_mut());
-        entries.retain(|(_, ino)| *ino != file.ino);
-
-        assert_eq!(
-            entries.len() as u64,
-            // SAFETY: `dlock` has done the synchronization
-            self.size.fetch_sub(1, Ordering::Relaxed) - 1
-        );
-
-        // SAFETY: `flock` has done the synchronization
-        let file_nlink = file.nlink.fetch_sub(1, Ordering::Relaxed) - 1;
-
-        if file_nlink == 0 {
-            // Remove the file inode from the inode cache
-            // The last reference to the inode is held by some dentry
-            // and will be released when the dentry is released
-            //
-            // TODO: Should we use some inode cache in tmpfs?
-            //
-            // vfs.icache.lock().retain(|ino, _| *ino != file.ino);
-        }
-
-        // Postpone the invalidation of the dentry and inode until the
-        // last reference to the dentry is released
-        //
-        // But we can remove it from the dentry cache immediately
+        let filename = at.get_name();
+        let file_lock = Task::block_on(file.rwsem.write());
+
+        let entries = self.entries.access_mut(dir_lock.prove_mut());
+
+        self.do_unlink(
+            &file,
+            &filename,
+            entries,
+            Instant::now(),
+            true,
+            dir_lock.prove_mut(),
+            file_lock.prove_mut(),
+        )?;
+
+        // Remove the dentry from the dentry cache immediately
         // so later lookup will fail with ENOENT
         dcache::d_remove(at);
 
@@ -213,6 +242,186 @@ impl Inode for DirectoryInode {
         let old = self.mode.load(Ordering::Relaxed);
         self.mode
             .store((old & !0o777) | (mode & 0o777), Ordering::Relaxed);
+        *self.ctime.lock() = Instant::now();
+
+        Ok(())
+    }
+
+    fn rename(&self, rename_data: RenameData) -> KResult<()> {
+        let RenameData {
+            old_dentry,
+            new_dentry,
+            new_parent,
+            is_exchange,
+            no_replace,
+            vfs,
+        } = rename_data;
+
+        if is_exchange {
+            println_warn!("TmpFs does not support exchange rename for now");
+            return Err(ENOSYS);
+        }
+
+        let vfs = vfs
+            .as_any()
+            .downcast_ref::<TmpFs>()
+            .expect("vfs must be a TmpFs");
+
+        let _rename_lock = Task::block_on(vfs.rename_lock.lock());
+
+        let old_file = old_dentry.get_inode()?;
+        let new_file = new_dentry.get_inode();
+
+        if no_replace && new_file.is_ok() {
+            return Err(EEXIST);
+        }
+
+        let same_parent = Arc::as_ptr(&new_parent) == &raw const *self;
+        if same_parent {
+            // Same directory rename
+            // Remove from old location and add to new location
+            let parent_lock = Task::block_on(self.rwsem.write());
+            let entries = self.entries.access_mut(parent_lock.prove_mut());
+
+            fn rename_old(
+                old_entry: &mut (Arc<[u8]>, Ino),
+                old_file: &Arc<dyn Inode + 'static>,
+                new_dentry: &Arc<Dentry>,
+                now: Instant,
+            ) {
+                let (name, _) = old_entry;
+                *name = new_dentry.get_name();
+                *old_file.ctime.lock() = now;
+            }
+
+            let old_ino = old_file.ino;
+            let new_ino = new_file.as_ref().ok().map(|f| f.ino);
+            let old_name = old_dentry.get_name();
+            let new_name = new_dentry.get_name();
+
+            // Find the old and new entries in the directory after we've locked the directory.
+            let indices =
+                entries
+                    .iter()
+                    .enumerate()
+                    .fold([None, None], |[old, new], (idx, (name, ino))| {
+                        if Some(*ino) == new_ino && *name == new_name {
+                            [old, Some(idx)]
+                        } else if *ino == old_ino && *name == old_name {
+                            [Some(idx), new]
+                        } else {
+                            [old, new]
+                        }
+                    });
+
+            let (old_entry_idx, new_entry_idx) = match indices {
+                [None, ..] => return Err(ENOENT),
+                [Some(old_idx), new_idx] => (old_idx, new_idx),
+            };
+
+            let now = Instant::now();
+
+            if let Some(new_idx) = new_entry_idx {
+                // Replace existing file (i.e. rename the old and unlink the new)
+                let new_file = new_file.unwrap();
+                let _new_file_lock = Task::block_on(new_file.rwsem.write());
+
+                // SAFETY: `new_file_lock` has done the synchronization
+                if new_file.mode.load(Ordering::Relaxed) & S_IFDIR != 0 {
+                    return Err(EISDIR);
+                } else {
+                    if old_file.mode.load(Ordering::Relaxed) & S_IFDIR != 0 {
+                        return Err(ENOTDIR);
+                    }
+                }
+
+                entries.remove(new_idx);
+
+                // SAFETY: `parent_lock` has done the synchronization
+                self.size.fetch_sub(1, Ordering::Relaxed);
+
+                // The last reference to the inode is held by some dentry
+                // and will be released when the dentry is released
+
+                // SAFETY: `new_file_lock` has done the synchronization
+                new_file.nlink.fetch_sub(1, Ordering::Relaxed);
+                *new_file.ctime.lock() = now;
+            }
+
+            rename_old(&mut entries[old_entry_idx], &old_file, new_dentry, now);
+            *self.mtime.lock() = now;
+        } else {
+            // Cross-directory rename - handle similar to same directory case
+
+            // Get new parent directory
+            let new_parent_inode = new_dentry.parent().get_inode()?;
+            assert!(new_parent_inode.is_dir());
+            let new_parent = (new_parent_inode.as_ref() as &dyn Any)
+                .downcast_ref::<DirectoryInode>()
+                .expect("new parent must be a DirectoryInode");
+
+            let old_parent_lock = Task::block_on(self.rwsem.write());
+            let new_parent_lock = Task::block_on(new_parent_inode.rwsem.write());
+
+            let old_ino = old_file.ino;
+            let new_ino = new_file.as_ref().ok().map(|f| f.ino);
+            let old_name = old_dentry.get_name();
+            let new_name = new_dentry.get_name();
+
+            // Find the old entry in the old directory
+            let old_entries = self.entries.access_mut(old_parent_lock.prove_mut());
+            let old_pos = old_entries
+                .iter()
+                .position(|(name, ino)| *ino == old_ino && *name == old_name)
+                .ok_or(ENOENT)?;
+
+            // Find the new entry in the new directory (if it exists)
+            let new_entries = new_parent.entries.access_mut(new_parent_lock.prove_mut());
+            let has_new = new_entries
+                .iter()
+                .position(|(name, ino)| Some(*ino) == new_ino && *name == new_name)
+                .is_some();
+
+            let now = Instant::now();
+
+            if has_new {
+                // Replace existing file (i.e. move the old and unlink the new)
+                let new_file = new_file.unwrap();
+                let new_file_lock = Task::block_on(new_file.rwsem.write());
+
+                if old_file.mode.load(Ordering::Relaxed) & S_IFDIR != 0
+                    && new_file.mode.load(Ordering::Relaxed) & S_IFDIR == 0
+                {
+                    return Err(ENOTDIR);
+                }
+
+                // Unlink the old file that was replaced
+                new_parent.do_unlink(
+                    &new_file,
+                    &new_name,
+                    new_entries,
+                    now,
+                    false,
+                    new_parent_lock.prove_mut(),
+                    new_file_lock.prove_mut(),
+                )?;
+            } else {
+                new_parent.size.fetch_add(1, Ordering::Relaxed);
+            }
+
+            // Remove from old directory
+            old_entries.remove(old_pos);
+
+            // Add new entry
+            new_entries.push((new_name, old_ino));
+
+            self.size.fetch_sub(1, Ordering::Relaxed);
+            *self.mtime.lock() = now;
+            *old_file.ctime.lock() = now;
+        }
+
+        Task::block_on(dcache::d_exchange(old_dentry, new_dentry));
+
         Ok(())
     }
 }
@@ -231,6 +440,9 @@ impl SymlinkInode {
 
             addr_of_mut_field!(&mut *inode, mode).write((S_IFLNK | 0o777).into());
             addr_of_mut_field!(&mut *inode, size).write((len as u64).into());
+            addr_of_mut_field!(&mut *inode, ctime).write(Spin::new(Instant::now()));
+            addr_of_mut_field!(&mut *inode, mtime).write(Spin::new(Instant::now()));
+            addr_of_mut_field!(&mut *inode, atime).write(Spin::new(Instant::now()));
         })
     }
 }
@@ -260,6 +472,9 @@ impl FileInode {
 
             addr_of_mut_field!(&mut *inode, mode).write((S_IFREG | (mode & 0o777)).into());
             addr_of_mut_field!(&mut *inode, nlink).write(1.into());
+            addr_of_mut_field!(&mut *inode, ctime).write(Spin::new(Instant::now()));
+            addr_of_mut_field!(&mut *inode, mtime).write(Spin::new(Instant::now()));
+            addr_of_mut_field!(&mut *inode, atime).write(Spin::new(Instant::now()));
         })
     }
 }
@@ -310,6 +525,8 @@ impl Inode for FileInode {
 
         // SAFETY: `lock` has done the synchronization
         self.size.store(pos as u64, Ordering::Relaxed);
+        *self.mtime.lock() = Instant::now();
+
         Ok(pos - offset)
     }
 
@@ -320,6 +537,8 @@ impl Inode for FileInode {
 
         // SAFETY: `lock` has done the synchronization
         self.size.store(length as u64, Ordering::Relaxed);
+        *self.mtime.lock() = Instant::now();
+
         filedata.resize(length, 0);
 
         Ok(())
@@ -333,6 +552,8 @@ impl Inode for FileInode {
         let old = self.mode.load(Ordering::Relaxed);
         self.mode
             .store((old & !0o777) | (mode & 0o777), Ordering::Relaxed);
+        *self.ctime.lock() = Instant::now();
+
         Ok(())
     }
 }
@@ -341,6 +562,7 @@ impl_any!(TmpFs);
 struct TmpFs {
     next_ino: AtomicIno,
     readonly: bool,
+    rename_lock: Mutex<()>,
 }
 
 impl Vfs for TmpFs {
@@ -366,6 +588,7 @@ impl TmpFs {
         let tmpfs = Arc::new(Self {
             next_ino: AtomicIno::new(1),
             readonly,
+            rename_lock: Mutex::new(()),
         });
 
         let weak = Arc::downgrade(&tmpfs);
@@ -383,6 +606,10 @@ impl MountCreator for TmpFsMountCreator {
 
         Mount::new(mp, fs, root_inode)
     }
+
+    fn check_signature(&self, _: &[u8]) -> KResult<bool> {
+        Ok(true)
+    }
 }
 
 pub fn init() {

+ 1 - 0
src/kernel.rs

@@ -4,6 +4,7 @@ pub mod constants;
 pub mod interrupt;
 pub mod mem;
 pub mod pcie;
+pub mod rtc;
 pub mod syscall;
 pub mod task;
 pub mod timer;

+ 21 - 13
src/kernel/block.rs

@@ -48,6 +48,21 @@ enum BlockDeviceType {
     },
 }
 
+#[derive(Debug, Clone)]
+pub enum FileSystemType {
+    Ext4,
+    Fat32,
+}
+
+impl FileSystemType {
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            FileSystemType::Ext4 => "ext4",
+            FileSystemType::Fat32 => "fat32",
+        }
+    }
+}
+
 pub struct BlockDevice {
     /// Unique device identifier, major and minor numbers
     devid: DevId,
@@ -114,14 +129,14 @@ impl BlockDevice {
         }
     }
 
-    pub fn register_partition(&self, idx: u32, offset: u64, size: u64) -> KResult<Arc<Self>> {
+    pub fn register_partition(&self, idx: usize, offset: u64, size: u64) -> KResult<Arc<Self>> {
         let queue = match &self.dev_type {
             BlockDeviceType::Disk { queue } => queue.clone(),
             BlockDeviceType::Partition { .. } => return Err(EINVAL),
         };
 
         let device = Arc::new(BlockDevice {
-            devid: make_device(self.devid >> 8, idx as u32),
+            devid: make_device(self.devid >> 8, (self.devid & 0xff) + idx as u32 + 1),
             sector_count: size,
             dev_type: BlockDeviceType::Partition {
                 disk_dev: self.devid,
@@ -140,17 +155,10 @@ impl BlockDevice {
         match self.dev_type {
             BlockDeviceType::Partition { .. } => Err(EINVAL),
             BlockDeviceType::Disk { .. } => {
-                let mbr_table = MBRPartTable::from_disk(self).await?;
-
-                for (
-                    idx,
-                    Partition {
-                        lba_offset,
-                        sector_count,
-                    },
-                ) in mbr_table.partitions().enumerate()
-                {
-                    self.register_partition(idx as u32 + 1, lba_offset, sector_count)?;
+                if let Ok(mbr_table) = MBRPartTable::from_disk(self).await {
+                    for (idx, partition) in mbr_table.partitions().enumerate() {
+                        self.register_partition(idx, partition.lba_offset, partition.sector_count)?;
+                    }
                 }
 
                 Ok(())

+ 5 - 4
src/kernel/chardev.rs

@@ -5,7 +5,7 @@ use super::{
     task::{ProcessList, Thread},
     terminal::Terminal,
     vfs::{
-        file::{File, TerminalFile},
+        file::{File, FileType, TerminalFile},
         DevId,
     },
 };
@@ -20,6 +20,7 @@ use alloc::{
 };
 use eonix_runtime::task::Task;
 use eonix_sync::AsProof as _;
+use posix_types::open::OpenFlags;
 
 pub trait VirtualCharDevice: Send + Sync {
     fn read(&self, buffer: &mut dyn Buffer) -> KResult<usize>;
@@ -71,7 +72,7 @@ impl CharDevice {
         }
     }
 
-    pub fn open(self: &Arc<Self>) -> KResult<Arc<File>> {
+    pub fn open(self: &Arc<Self>, flags: OpenFlags) -> KResult<Arc<File>> {
         Ok(match &self.device {
             CharDeviceType::Terminal(terminal) => {
                 let procs = Task::block_on(ProcessList::get().read());
@@ -87,9 +88,9 @@ impl CharDevice {
                     )));
                 }
 
-                TerminalFile::new(terminal.clone())
+                TerminalFile::new(terminal.clone(), flags)
             }
-            CharDeviceType::Virtual(_) => Arc::new(File::CharDev(self.clone())),
+            CharDeviceType::Virtual(_) => File::new(flags, FileType::CharDev(self.clone())),
         })
     }
 }

+ 2 - 2
src/kernel/constants.rs

@@ -61,8 +61,8 @@ pub const SEEK_END: u32 = 2;
 pub const F_DUPFD: u32 = 0;
 pub const F_GETFD: u32 = 1;
 pub const F_SETFD: u32 = 2;
-// pub const F_GETFL: u32 = 3;
-// pub const F_SETFL: u32 = 4;
+pub const F_GETFL: u32 = 3;
+pub const F_SETFL: u32 = 4;
 pub const F_DUPFD_CLOEXEC: u32 = 1030;
 
 pub const STATX_TYPE: u32 = 1;

+ 5 - 1
src/kernel/mem/mm_list.rs

@@ -553,6 +553,10 @@ impl MMList {
             Some(pos) => pos.ceil(),
         };
 
+        if current_break > pos {
+            return current_break;
+        }
+
         let range = VRange::new(current_break, pos);
         if !inner.check_overlapping_range(range) {
             return current_break;
@@ -753,7 +757,7 @@ where
             .as_page_attr()
             .expect("Not a page attribute");
 
-        if !from_attr.contains(PageAttribute::PRESENT) {
+        if !from_attr.intersects(PageAttribute::PRESENT | PageAttribute::MAPPED) {
             return;
         }
 

+ 3 - 3
src/kernel/mem/mm_list/page_fault.rs

@@ -35,15 +35,15 @@ impl FixEntry {
 
     fn entries() -> &'static [FixEntry] {
         extern "C" {
-            static FIX_START: *const FixEntry;
-            static FIX_END: *const FixEntry;
+            fn FIX_START();
+            fn FIX_END();
         }
 
         unsafe {
             // SAFETY: `FIX_START` and `FIX_END` are defined in the
             //         linker script in `.rodata` section.
             core::slice::from_raw_parts(
-                FIX_START,
+                FIX_START as usize as *const FixEntry,
                 (FIX_END as usize - FIX_START as usize) / size_of::<FixEntry>(),
             )
         }

+ 33 - 0
src/kernel/rtc/mod.rs

@@ -0,0 +1,33 @@
+use core::time::Duration;
+
+use super::timer::{Instant, Ticks};
+use alloc::sync::Arc;
+use eonix_log::println_warn;
+use eonix_sync::Spin;
+
+static RTC: Spin<Option<Arc<dyn RealTimeClock>>> = Spin::new(None);
+
+pub trait RealTimeClock: Send + Sync {
+    fn now(&self) -> Instant;
+}
+
+impl Instant {
+    pub fn now() -> Instant {
+        RTC.lock().as_ref().map(|rtc| rtc.now()).unwrap_or_else(|| {
+            let since_boot = Ticks::since_boot();
+            let pseudo_now = Duration::from_secs((55 * 365 + 30) * 24 * 3600) + since_boot;
+
+            Instant::new(pseudo_now.as_secs(), pseudo_now.subsec_nanos())
+        })
+    }
+}
+
+pub fn register_rtc(rtc: impl RealTimeClock + 'static) {
+    let mut rtc_lock = RTC.lock();
+    if rtc_lock.is_some() {
+        println_warn!("RTC is already registered, ignoring new registration");
+        return;
+    }
+
+    *rtc_lock = Some(Arc::new(rtc));
+}

+ 8 - 0
src/kernel/syscall.rs

@@ -68,6 +68,14 @@ impl SyscallRetVal for SyscallNoReturn {
     }
 }
 
+#[cfg(not(target_arch = "x86_64"))]
+impl SyscallRetVal for u64 {
+    fn into_retval(self) -> Option<usize> {
+        Some(self as usize)
+    }
+}
+
+#[cfg(not(target_arch = "x86_64"))]
 impl FromSyscallArg for u64 {
     fn from_arg(value: usize) -> u64 {
         value as u64

+ 151 - 18
src/kernel/syscall/file_rw.rs

@@ -23,9 +23,11 @@ use crate::{
 use alloc::sync::Arc;
 use eonix_runtime::task::Task;
 use posix_types::ctypes::{Long, PtrT};
+use posix_types::namei::RenameFlags;
 use posix_types::open::{AtFlags, OpenFlags};
 use posix_types::signal::SigSet;
-use posix_types::stat::{Stat, StatX, TimeSpec};
+use posix_types::stat::Stat;
+use posix_types::stat::{StatX, TimeSpec};
 use posix_types::syscall_no::*;
 
 impl FromSyscallArg for OpenFlags {
@@ -146,8 +148,11 @@ fn getdents64(fd: FD, buffer: *mut u8, bufsize: usize) -> KResult<usize> {
     Ok(buffer.wrote())
 }
 
-#[cfg(not(target_arch = "x86_64"))]
-#[eonix_macros::define_syscall(SYS_NEWFSTATAT)]
+#[cfg_attr(
+    not(target_arch = "x86_64"),
+    eonix_macros::define_syscall(SYS_NEWFSTATAT)
+)]
+#[cfg_attr(target_arch = "x86_64", eonix_macros::define_syscall(SYS_FSTATAT64))]
 fn newfstatat(dirfd: FD, pathname: *const u8, statbuf: *mut Stat, flags: AtFlags) -> KResult<()> {
     let dentry = if flags.at_empty_path() {
         let file = thread.files.get(dirfd).ok_or(EBADF)?;
@@ -166,6 +171,21 @@ fn newfstatat(dirfd: FD, pathname: *const u8, statbuf: *mut Stat, flags: AtFlags
     Ok(())
 }
 
+#[cfg_attr(
+    not(target_arch = "x86_64"),
+    eonix_macros::define_syscall(SYS_NEWFSTAT)
+)]
+#[cfg_attr(target_arch = "x86_64", eonix_macros::define_syscall(SYS_FSTAT64))]
+fn newfstat(fd: FD, statbuf: *mut Stat) -> KResult<()> {
+    sys_newfstatat(
+        thread,
+        fd,
+        core::ptr::null(),
+        statbuf,
+        AtFlags::AT_EMPTY_PATH,
+    )
+}
+
 #[eonix_macros::define_syscall(SYS_STATX)]
 fn statx(
     dirfd: FD,
@@ -275,20 +295,30 @@ fn readlink(pathname: *const u8, buffer: *mut u8, bufsize: usize) -> KResult<usi
     sys_readlinkat(thread, FD::AT_FDCWD, pathname, buffer, bufsize)
 }
 
-#[cfg(target_arch = "x86_64")]
-#[eonix_macros::define_syscall(SYS_LLSEEK)]
-fn llseek(fd: FD, offset_high: u32, offset_low: u32, result: *mut u64, whence: u32) -> KResult<()> {
-    let mut result = UserBuffer::new(result as *mut u8, core::mem::size_of::<u64>())?;
+fn do_lseek(thread: &Thread, fd: FD, offset: u64, whence: u32) -> KResult<u64> {
     let file = thread.files.get(fd).ok_or(EBADF)?;
 
-    let offset = ((offset_high as u64) << 32) | offset_low as u64;
-
-    let new_offset = match whence {
+    Ok(match whence {
         SEEK_SET => file.seek(SeekOption::Set(offset as usize))?,
         SEEK_CUR => file.seek(SeekOption::Current(offset as isize))?,
         SEEK_END => file.seek(SeekOption::End(offset as isize))?,
         _ => return Err(EINVAL),
-    } as u64;
+    } as u64)
+}
+
+#[cfg(not(target_arch = "x86_64"))]
+#[eonix_macros::define_syscall(SYS_LSEEK)]
+fn lseek(fd: FD, offset: u64, whence: u32) -> KResult<u64> {
+    do_lseek(thread, fd, offset, whence)
+}
+
+#[cfg(target_arch = "x86_64")]
+#[eonix_macros::define_syscall(SYS_LLSEEK)]
+fn llseek(fd: FD, offset_high: u32, offset_low: u32, result: *mut u64, whence: u32) -> KResult<()> {
+    let mut result = UserBuffer::new(result as *mut u8, core::mem::size_of::<u64>())?;
+    let offset = ((offset_high as u64) << 32) | (offset_low as u64);
+
+    let new_offset = do_lseek(thread, fd, offset, whence)?;
 
     result.copy(&new_offset)?.ok_or(EFAULT)
 }
@@ -370,13 +400,14 @@ fn writev(fd: FD, iov_user: *const IoVec, iovcnt: u32) -> KResult<usize> {
     Ok(tot)
 }
 
-#[cfg(target_arch = "x86_64")]
-#[eonix_macros::define_syscall(SYS_ACCESS)]
-fn access(pathname: *const u8, _mode: u32) -> KResult<()> {
-    let path = UserString::new(pathname)?;
-    let path = Path::new(path.as_cstr().to_bytes())?;
-
-    let dentry = Dentry::open(&thread.fs_context, path, true)?;
+#[eonix_macros::define_syscall(SYS_FACCESSAT)]
+fn faccessat(dirfd: FD, pathname: *const u8, _mode: u32, flags: AtFlags) -> KResult<()> {
+    let dentry = if flags.at_empty_path() {
+        let file = thread.files.get(dirfd).ok_or(EBADF)?;
+        file.as_path().ok_or(EBADF)?.clone()
+    } else {
+        dentry_from(thread, dirfd, pathname, !flags.no_follow())?
+    };
 
     if !dentry.is_valid() {
         return Err(ENOENT);
@@ -390,9 +421,16 @@ fn access(pathname: *const u8, _mode: u32) -> KResult<()> {
     //     X_OK => todo!(),
     //     _ => Err(EINVAL),
     // }
+
     Ok(())
 }
 
+#[cfg(target_arch = "x86_64")]
+#[eonix_macros::define_syscall(SYS_ACCESS)]
+fn access(pathname: *const u8, mode: u32) -> KResult<()> {
+    sys_faccessat(thread, FD::AT_FDCWD, pathname, mode, AtFlags::empty())
+}
+
 #[eonix_macros::define_syscall(SYS_SENDFILE64)]
 fn sendfile64(out_fd: FD, in_fd: FD, offset: *mut u8, count: usize) -> KResult<usize> {
     let in_file = thread.files.get(in_fd).ok_or(EBADF)?;
@@ -463,4 +501,99 @@ fn poll(fds: *mut UserPollFd, nfds: u32, timeout: u32) -> KResult<u32> {
     do_poll(thread, fds, nfds, timeout)
 }
 
+#[eonix_macros::define_syscall(SYS_FCHOWNAT)]
+fn fchownat(dirfd: FD, pathname: *const u8, uid: u32, gid: u32, flags: AtFlags) -> KResult<()> {
+    let dentry = dentry_from(thread, dirfd, pathname, !flags.no_follow())?;
+    if !dentry.is_valid() {
+        return Err(ENOENT);
+    }
+
+    dentry.chown(uid, gid)
+}
+
+#[eonix_macros::define_syscall(SYS_FCHMODAT)]
+fn fchmodat(dirfd: FD, pathname: *const u8, mode: u32, flags: AtFlags) -> KResult<()> {
+    let dentry = if flags.at_empty_path() {
+        let file = thread.files.get(dirfd).ok_or(EBADF)?;
+        file.as_path().ok_or(EBADF)?.clone()
+    } else {
+        dentry_from(thread, dirfd, pathname, !flags.no_follow())?
+    };
+
+    if !dentry.is_valid() {
+        return Err(ENOENT);
+    }
+
+    dentry.chmod(mode)
+}
+
+#[eonix_macros::define_syscall(SYS_FCHMOD)]
+fn chmod(pathname: *const u8, mode: u32) -> KResult<()> {
+    sys_fchmodat(thread, FD::AT_FDCWD, pathname, mode, AtFlags::empty())
+}
+
+#[eonix_macros::define_syscall(SYS_UTIMENSAT)]
+fn utimensat(
+    dirfd: FD,
+    pathname: *const u8,
+    times: *const TimeSpec,
+    flags: AtFlags,
+) -> KResult<()> {
+    let dentry = if flags.at_empty_path() {
+        let file = thread.files.get(dirfd).ok_or(EBADF)?;
+        file.as_path().ok_or(EBADF)?.clone()
+    } else {
+        dentry_from(thread, dirfd, pathname, !flags.no_follow())?
+    };
+
+    if !dentry.is_valid() {
+        return Err(ENOENT);
+    }
+
+    let _times = if times.is_null() {
+        [TimeSpec::default(), TimeSpec::default()]
+    } else {
+        let times = UserPointer::new(times)?;
+        [times.read()?, times.offset(1)?.read()?]
+    };
+
+    // TODO: Implement utimensat
+    // dentry.utimens(&times)
+    Ok(())
+}
+
+#[eonix_macros::define_syscall(SYS_RENAMEAT2)]
+fn renameat2(
+    old_dirfd: FD,
+    old_pathname: *const u8,
+    new_dirfd: FD,
+    new_pathname: *const u8,
+    flags: u32,
+) -> KResult<()> {
+    let flags = RenameFlags::from_bits(flags).ok_or(EINVAL)?;
+
+    // The two flags RENAME_NOREPLACE and RENAME_EXCHANGE are mutually exclusive.
+    if flags.contains(RenameFlags::RENAME_NOREPLACE | RenameFlags::RENAME_EXCHANGE) {
+        Err(EINVAL)?;
+    }
+
+    let old_dentry = dentry_from(thread, old_dirfd, old_pathname, false)?;
+    let new_dentry = dentry_from(thread, new_dirfd, new_pathname, false)?;
+
+    old_dentry.rename(&new_dentry, flags)
+}
+
+#[cfg(target_arch = "x86_64")]
+#[eonix_macros::define_syscall(SYS_RENAME)]
+fn rename(old_pathname: *const u8, new_pathname: *const u8) -> KResult<()> {
+    sys_renameat2(
+        thread,
+        FD::AT_FDCWD,
+        old_pathname,
+        FD::AT_FDCWD,
+        new_pathname,
+        0,
+    )
+}
+
 pub fn keep_alive() {}

+ 36 - 40
src/kernel/syscall/procops.rs

@@ -1,6 +1,3 @@
-use core::time::Duration;
-
-use super::sysinfo::TimeVal;
 use super::SyscallNoReturn;
 use crate::io::Buffer;
 use crate::kernel::constants::{EINVAL, ENOENT, ENOTDIR, ERANGE, ESRCH};
@@ -10,7 +7,7 @@ use crate::kernel::constants::{
 use crate::kernel::mem::PageBuffer;
 use crate::kernel::task::{
     do_clone, futex_wait, futex_wake, FutexFlags, FutexOp, ProcessList, ProgramLoader,
-    SignalAction, Thread, WaitType,
+    RobustListHead, SignalAction, Thread, WaitType,
 };
 use crate::kernel::task::{parse_futexop, CloneArgs};
 use crate::kernel::timer::sleep;
@@ -23,14 +20,16 @@ use alloc::borrow::ToOwned;
 use alloc::ffi::CString;
 use bitflags::bitflags;
 use core::ptr::NonNull;
+use core::time::Duration;
 use eonix_hal::processor::UserTLS;
 use eonix_hal::traits::trap::RawTrapContext;
-use eonix_mm::address::Addr as _;
+use eonix_mm::address::{Addr as _, VAddr};
 use eonix_runtime::task::Task;
 use eonix_sync::AsProof as _;
 use posix_types::constants::{P_ALL, P_PID};
 use posix_types::ctypes::PtrT;
 use posix_types::signal::{SigAction, SigInfo, SigSet, Signal};
+use posix_types::stat::TimeVal;
 use posix_types::{syscall_no::*, SIGNAL_NOW};
 
 #[repr(C)]
@@ -81,11 +80,8 @@ fn getcwd(buffer: *mut u8, bufsize: usize) -> KResult<usize> {
     let mut user_buffer = UserBuffer::new(buffer, bufsize)?;
     let mut buffer = PageBuffer::new();
 
-    thread
-        .fs_context
-        .cwd
-        .lock()
-        .get_path(&thread.fs_context, &mut buffer)?;
+    let cwd = thread.fs_context.cwd.lock().clone();
+    cwd.get_path(&thread.fs_context, &mut buffer)?;
 
     user_buffer.fill(buffer.data())?.ok_or(ERANGE)?;
 
@@ -170,24 +166,27 @@ fn execve(exec: *const u8, argv: *const PtrT, envp: *const PtrT) -> KResult<Sysc
 
     // TODO: When `execve` is called by one of the threads in a process, the other threads
     //       should be terminated and `execve` is performed in the thread group leader.
-    if let Ok(load_info) = ProgramLoader::parse(dentry.clone())?.load(argv, envp) {
-        unsafe {
-            // SAFETY: We are doing execve, all other threads are terminated.
-            thread.process.mm_list.replace(Some(load_info.mm_list));
-        }
-        thread.files.on_exec();
-        thread.signal_list.clear_non_ignore();
-        thread.set_name(dentry.name().clone());
-
-        let mut trap_ctx = thread.trap_ctx.borrow();
-        trap_ctx.set_program_counter(load_info.entry_ip.addr());
-        trap_ctx.set_stack_pointer(load_info.sp.addr());
-        Ok(SyscallNoReturn)
-    } else {
-        // We can't hold any ownership when we call `kill_current`.
-        // ProcessList::kill_current(Signal::SIGSEGV);
-        todo!()
+    let load_info = ProgramLoader::parse(dentry.clone())?.load(argv, envp)?;
+
+    if let Some(robust_list) = thread.get_robust_list() {
+        let _ = Task::block_on(robust_list.wake_all());
+        thread.set_robust_list(None);
     }
+
+    unsafe {
+        // SAFETY: We are doing execve, all other threads are terminated.
+        thread.process.mm_list.replace(Some(load_info.mm_list));
+    }
+
+    thread.files.on_exec();
+    thread.signal_list.clear_non_ignore();
+    thread.set_name(dentry.get_name());
+
+    let mut trap_ctx = thread.trap_ctx.borrow();
+    trap_ctx.set_program_counter(load_info.entry_ip.addr());
+    trap_ctx.set_stack_pointer(load_info.sp.addr());
+
+    Ok(SyscallNoReturn)
 }
 
 #[eonix_macros::define_syscall(SYS_EXIT)]
@@ -684,19 +683,6 @@ fn getrusage(who: u32, rusage: *mut RUsage) -> KResult<()> {
     Ok(())
 }
 
-#[eonix_macros::define_syscall(SYS_FCHMOD)]
-fn chmod(pathname: *const u8, mode: u32) -> KResult<()> {
-    let path = UserString::new(pathname)?;
-    let path = Path::new(path.as_cstr().to_bytes())?;
-
-    let dentry = Dentry::open(&thread.fs_context, path, true)?;
-    if !dentry.is_valid() {
-        return Err(ENOENT);
-    }
-
-    dentry.chmod(mode)
-}
-
 #[cfg(target_arch = "x86_64")]
 #[eonix_macros::define_syscall(SYS_VFORK)]
 fn vfork() -> KResult<u32> {
@@ -760,6 +746,16 @@ fn futex(
     }
 }
 
+#[eonix_macros::define_syscall(SYS_SET_ROBUST_LIST)]
+fn set_robust_list(head: usize, len: usize) -> KResult<()> {
+    if len != size_of::<RobustListHead>() {
+        return Err(EINVAL);
+    }
+
+    thread.set_robust_list(Some(VAddr::from(head)));
+    Ok(())
+}
+
 #[eonix_macros::define_syscall(SYS_RT_SIGRETURN)]
 fn rt_sigreturn() -> KResult<SyscallNoReturn> {
     thread

+ 24 - 25
src/kernel/syscall/sysinfo.rs

@@ -2,12 +2,15 @@ use crate::{
     kernel::{
         constants::{CLOCK_MONOTONIC, CLOCK_REALTIME, EINVAL},
         task::Thread,
-        timer::ticks,
+        timer::{Instant, Ticks},
         user::UserPointerMut,
     },
     prelude::*,
 };
-use posix_types::syscall_no::*;
+use posix_types::{
+    stat::{TimeSpec, TimeVal},
+    syscall_no::*,
+};
 
 #[derive(Clone, Copy)]
 struct NewUTSName {
@@ -40,26 +43,18 @@ fn newuname(buffer: *mut NewUTSName) -> KResult<()> {
     // Linux compatible
     copy_cstr_to_array(b"Linux", &mut uname.sysname);
     copy_cstr_to_array(b"(none)", &mut uname.nodename);
-    copy_cstr_to_array(b"1.0.0", &mut uname.release);
-    copy_cstr_to_array(b"1.0.0", &mut uname.version);
+    copy_cstr_to_array(b"5.17.1", &mut uname.release);
+    copy_cstr_to_array(b"eonix 1.1.4", &mut uname.version);
+
+    #[cfg(target_arch = "x86_64")]
     copy_cstr_to_array(b"x86", &mut uname.machine);
-    copy_cstr_to_array(b"(none)", &mut uname.domainname);
 
-    buffer.write(uname)
-}
+    #[cfg(target_arch = "riscv64")]
+    copy_cstr_to_array(b"riscv64", &mut uname.machine);
 
-#[allow(dead_code)]
-#[derive(Default, Clone, Copy)]
-pub struct TimeVal {
-    sec: u64,
-    usec: u64,
-}
+    copy_cstr_to_array(b"(none)", &mut uname.domainname);
 
-#[allow(dead_code)]
-#[derive(Clone, Copy)]
-pub struct TimeSpec {
-    sec: u64,
-    nsec: u64,
+    buffer.write(uname)
 }
 
 #[eonix_macros::define_syscall(SYS_GETTIMEOFDAY)]
@@ -70,10 +65,12 @@ fn gettimeofday(timeval: *mut TimeVal, timezone: *mut ()) -> KResult<()> {
 
     if !timeval.is_null() {
         let timeval = UserPointerMut::new(timeval)?;
-        let ticks = ticks();
+        let now = Instant::now();
+        let since_epoch = now.since_epoch();
+
         timeval.write(TimeVal {
-            sec: ticks.in_secs() as u64,
-            usec: ticks.in_usecs() as u64 % 1_000_000,
+            tv_sec: since_epoch.as_secs(),
+            tv_usec: since_epoch.subsec_micros(),
         })?;
     }
 
@@ -86,10 +83,12 @@ fn do_clock_gettime64(_thread: &Thread, clock_id: u32, timespec: *mut TimeSpec)
     }
 
     let timespec = UserPointerMut::new(timespec)?;
-    let ticks = ticks();
+    let now = Instant::now();
+    let since_epoch = now.since_epoch();
+
     timespec.write(TimeSpec {
-        sec: ticks.in_secs() as u64,
-        nsec: ticks.in_nsecs() as u64 % 1_000_000_000,
+        tv_sec: since_epoch.as_secs(),
+        tv_nsec: since_epoch.subsec_nanos(),
     })
 }
 
@@ -127,7 +126,7 @@ struct Sysinfo {
 fn sysinfo(info: *mut Sysinfo) -> KResult<()> {
     let info = UserPointerMut::new(info)?;
     info.write(Sysinfo {
-        uptime: ticks().in_secs() as u32,
+        uptime: Ticks::since_boot().as_secs() as u32,
         loads: [0; 3],
         totalram: 100,
         freeram: 50,

+ 1 - 1
src/kernel/task.rs

@@ -10,7 +10,7 @@ mod signal;
 mod thread;
 
 pub use clone::{do_clone, CloneArgs, CloneFlags};
-pub use futex::{futex_wait, futex_wake, parse_futexop, FutexFlags, FutexOp};
+pub use futex::{futex_wait, futex_wake, parse_futexop, FutexFlags, FutexOp, RobustListHead};
 pub use kernel_stack::KernelStack;
 pub use loader::ProgramLoader;
 pub use process::{alloc_pid, Process, ProcessBuilder, WaitObject, WaitType};

+ 61 - 0
src/kernel/task/futex.rs

@@ -256,3 +256,64 @@ async fn futex_requeue(
 
     todo!()
 }
+
+// The purpose of the robust futex list is to ensure that if a thread
+// accidentally fails to unlock a futex before terminating or calling
+// execve(2), another thread that is waiting on that futex is
+// notified that the former owner of the futex has died.  This
+// notification consists of two pieces: the FUTEX_OWNER_DIED bit is
+// set in the futex word, and the kernel performs a futex(2)
+// FUTEX_WAKE operation on one of the threads waiting on the futex.
+// https://man7.org/linux/man-pages/man2/get_robust_list.2.html
+
+#[repr(C)]
+#[derive(Debug, Clone, Copy)]
+struct RobustList {
+    next: usize, // Pointer to the next RobustList entry
+}
+
+#[repr(C)]
+#[derive(Debug, Clone, Copy)]
+pub struct RobustListHead {
+    robust_list: RobustList,
+    futex_offset: isize,
+    list_op_pending: usize,
+}
+
+impl RobustListHead {
+    fn futex_addr(&self, entry_ptr: usize) -> usize {
+        (self.futex_offset + entry_ptr as isize) as usize
+    }
+
+    pub async fn wake_all(&self) -> KResult<()> {
+        let end_ptr = self.robust_list.next;
+        let mut entry_ptr = end_ptr;
+
+        if entry_ptr == 0 {
+            return Ok(());
+        }
+
+        loop {
+            // Wake up the futex at the entry_ptr address.
+            let futex_addr = self.futex_addr(entry_ptr);
+            futex_wake(futex_addr, None, usize::MAX as u32).await?;
+
+            // Move to the next entry in the robust list.
+            let robust_list = UserPointer::new(entry_ptr as *const RobustList)?.read()?;
+
+            entry_ptr = robust_list.next;
+
+            if entry_ptr == end_ptr || entry_ptr == 0 {
+                break;
+            }
+        }
+
+        if self.list_op_pending != 0 {
+            // If there is a pending operation, we need to wake it up.
+            let pending_futex_addr = self.futex_addr(self.list_op_pending);
+            futex_wake(pending_futex_addr, None, usize::MAX as u32).await?;
+        }
+
+        Ok(())
+    }
+}

+ 6 - 3
src/kernel/task/loader/elf.rs

@@ -297,6 +297,10 @@ impl<E: ElfArch> Elf<E> {
             self.entry_point()
         };
         aux_vec.set(AuxKey::AT_ENTRY, E::Ea::from_usize(elf_entry))?;
+        aux_vec.set(
+            AuxKey::AT_RANDOM,
+            E::Ea::from_usize(E::STACK_BASE_ADDR - 16),
+        )?;
 
         if let Some(ldso_base) = ldso_base {
             aux_vec.set(AuxKey::AT_BASE, E::Ea::from_usize(ldso_base.addr()))?;
@@ -377,9 +381,8 @@ impl<E: ElfArch> Elf<E> {
 
         if let Some(ldso_path) = ldso_path {
             let fs_context = FsContext::global();
-            let ldso_file =
-                Dentry::open(fs_context, Path::new(ldso_path.as_bytes()).unwrap(), true).unwrap();
-            let ldso_elf = Elf::<E>::parse(ldso_file).unwrap();
+            let ldso_file = Dentry::open(fs_context, Path::new(ldso_path.as_bytes())?, true)?;
+            let ldso_elf = Elf::<E>::parse(ldso_file)?;
 
             let base = VAddr::from(E::LDSO_BASE_ADDR);
 

+ 7 - 6
src/kernel/task/process_list.rs

@@ -135,14 +135,15 @@ impl ProcessList {
         }
 
         if let Some(clear_ctid) = thread.get_clear_ctid() {
-            UserPointerMut::new(clear_ctid as *mut u32)
+            let _ = UserPointerMut::new(clear_ctid as *mut u32)
                 .unwrap()
-                .write(0u32)
-                .expect("should clear child tid successfully");
+                .write(0u32);
 
-            futex_wake(clear_ctid, None, 1)
-                .await
-                .expect("should wake up child tid");
+            let _ = futex_wake(clear_ctid, None, 1).await;
+        }
+
+        if let Some(robust_list) = thread.get_robust_list() {
+            let _ = robust_list.wake_all().await;
         }
 
         // main thread exit

+ 16 - 2
src/kernel/task/thread.rs

@@ -6,9 +6,9 @@ use crate::{
     kernel::{
         interrupt::default_irq_handler,
         syscall::{syscall_handlers, SyscallHandler},
-        task::{clone::CloneArgs, CloneFlags},
+        task::{clone::CloneArgs, futex::RobustListHead, CloneFlags},
         timer::{should_reschedule, timer_interrupt},
-        user::UserPointerMut,
+        user::{UserPointer, UserPointerMut},
         vfs::{filearray::FileArray, FsContext},
     },
     prelude::*,
@@ -74,6 +74,8 @@ struct ThreadInner {
     set_child_tid: Option<usize>,
 
     clear_child_tid: Option<usize>,
+
+    robust_list_address: Option<VAddr>,
 }
 
 pub struct Thread {
@@ -251,6 +253,7 @@ impl ThreadBuilder {
                 tls: self.tls,
                 set_child_tid: self.set_child_tid,
                 clear_child_tid: self.clear_child_tid,
+                robust_list_address: None,
             }),
         });
 
@@ -288,6 +291,17 @@ impl Thread {
         Ok(())
     }
 
+    pub fn set_robust_list(&self, robust_list_address: Option<VAddr>) {
+        self.inner.lock().robust_list_address = robust_list_address;
+    }
+
+    pub fn get_robust_list(&self) -> Option<RobustListHead> {
+        let addr = self.inner.lock().robust_list_address?;
+        let user_pointer = UserPointer::new(addr.addr() as *const RobustListHead).ok()?;
+
+        user_pointer.read().ok()
+    }
+
     pub fn set_name(&self, name: Arc<[u8]>) {
         self.inner.lock().name = name;
     }

+ 66 - 17
src/kernel/timer.rs

@@ -9,6 +9,7 @@ use core::{
 };
 use eonix_hal::processor::CPU;
 use eonix_sync::{Spin, SpinIrq as _};
+use posix_types::stat::{StatXTimestamp, TimeSpec, TimeVal};
 
 static TICKS: AtomicUsize = AtomicUsize::new(0);
 static WAKEUP_TICK: AtomicUsize = AtomicUsize::new(usize::MAX);
@@ -17,7 +18,11 @@ static SLEEPERS_LIST: Spin<BinaryHeap<Reverse<Sleepers>>> = Spin::new(BinaryHeap
 #[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd)]
 pub struct Ticks(usize);
 
-pub struct Instant(Ticks);
+#[derive(Default, Clone, Copy)]
+pub struct Instant {
+    secs_since_epoch: u64,
+    nsecs_within: u32,
+}
 
 struct Sleepers {
     wakeup_tick: Ticks,
@@ -71,24 +76,65 @@ impl Ticks {
 }
 
 impl Instant {
-    pub fn now() -> Self {
-        Instant(Ticks::now())
+    pub const fn default() -> Self {
+        Instant {
+            secs_since_epoch: 0,
+            nsecs_within: 0,
+        }
+    }
+
+    pub fn new(secs_since_epoch: u64, nsecs_within: u32) -> Self {
+        Instant {
+            secs_since_epoch,
+            nsecs_within,
+        }
     }
 
     pub fn elapsed(&self) -> Duration {
-        Duration::from_nanos((Ticks::now().in_nsecs() - self.0.in_nsecs()) as u64)
+        let now = Instant::now();
+        if now.nsecs_within < self.nsecs_within {
+            // We have wrapped around the nanoseconds.
+            Duration::new(
+                now.secs_since_epoch - self.secs_since_epoch - 1,
+                1_000_000_000 + now.nsecs_within - self.nsecs_within,
+            )
+        } else {
+            Duration::new(
+                now.secs_since_epoch - self.secs_since_epoch,
+                now.nsecs_within - self.nsecs_within,
+            )
+        }
+    }
+
+    pub fn since_epoch(&self) -> Duration {
+        Duration::new(self.secs_since_epoch, self.nsecs_within)
     }
 }
 
-impl From<Ticks> for Instant {
-    fn from(ticks: Ticks) -> Self {
-        Instant(ticks)
+impl From<Instant> for TimeSpec {
+    fn from(value: Instant) -> Self {
+        Self {
+            tv_sec: value.secs_since_epoch,
+            tv_nsec: value.nsecs_within,
+        }
     }
 }
 
-impl From<Instant> for Ticks {
-    fn from(instant: Instant) -> Self {
-        instant.0
+impl From<Instant> for TimeVal {
+    fn from(value: Instant) -> Self {
+        Self {
+            tv_sec: value.secs_since_epoch,
+            tv_usec: value.nsecs_within / 1_000,
+        }
+    }
+}
+
+impl From<Instant> for StatXTimestamp {
+    fn from(value: Instant) -> Self {
+        Self {
+            tv_sec: value.secs_since_epoch,
+            tv_nsec: value.nsecs_within,
+        }
     }
 }
 
@@ -104,7 +150,15 @@ impl Add<Duration> for Instant {
     type Output = Instant;
 
     fn add(self, duration: Duration) -> Self::Output {
-        Instant(self.0 + Ticks(duration.as_millis() as usize))
+        let nsecs = self.nsecs_within + duration.subsec_nanos();
+        let nsecs_within = nsecs % 1_000_000_000;
+        let secs_since_epoch =
+            self.secs_since_epoch + duration.as_secs() + (nsecs / 1_000_000_000) as u64;
+
+        Instant {
+            secs_since_epoch,
+            nsecs_within: nsecs_within,
+        }
     }
 }
 
@@ -156,13 +210,8 @@ pub fn should_reschedule() -> bool {
     }
 }
 
-pub fn ticks() -> Ticks {
-    Ticks::now()
-}
-
 pub async fn sleep(duration: Duration) {
-    let wakeup_time = Instant::now() + duration;
-    let wakeup_tick = Ticks::from(wakeup_time);
+    let wakeup_tick = Ticks::now() + Ticks(duration.as_millis() as usize);
 
     core::future::poll_fn(|ctx| {
         if Ticks::now() >= wakeup_tick {

+ 16 - 20
src/kernel/user/dataflow.rs

@@ -142,11 +142,10 @@ impl CheckedUserPointer<'_> {
                 "3:",
                 "nop",
                 ".pushsection .fix, \"a\", @progbits",
-                ".align 16",
-                ".quad 2b",      // instruction address
-                ".quad 3b - 2b", // instruction length
-                ".quad 3b",      // fix jump address
-                ".quad 0x3",     // type: load
+                ".8byte 2b",      // instruction address
+                ".8byte 3b - 2b", // instruction length
+                ".8byte 3b",      // fix jump address
+                ".8byte 0x3",     // type: load
                 ".popsection",
                 inout("a0") total => error_bytes,
                 inout("a1") self.ptr => _,
@@ -203,11 +202,10 @@ impl CheckedUserPointer<'_> {
                 "3:",
                 "nop",
                 ".pushsection .fix, \"a\", @progbits",
-                ".align 16",
-                ".quad 2b",  // instruction address
-                ".quad 3b - 2b",  // instruction length
-                ".quad 3b",  // fix jump address
-                ".quad 0x1", // type: store
+                ".8byte 2b",  // instruction address
+                ".8byte 3b - 2b",  // instruction length
+                ".8byte 3b",  // fix jump address
+                ".8byte 0x1", // type: store
                 ".popsection",
                 inout("a0") total => error_bytes,
                 inout("a1") data => _,
@@ -263,11 +261,10 @@ impl CheckedUserPointer<'_> {
                 "3:",
                 "nop",
                 ".pushsection .fix, \"a\", @progbits",
-                ".align 16",
-                ".quad 2b",  // instruction address
-                ".quad 3b - 2b",  // instruction length
-                ".quad 3b",  // fix jump address
-                ".quad 0x1", // type: store
+                ".8byte 2b",  // instruction address
+                ".8byte 3b - 2b",  // instruction length
+                ".8byte 3b",  // fix jump address
+                ".8byte 0x1", // type: store
                 ".popsection",
                 inout("a0") self.len => error_bytes,
                 inout("a1") self.ptr => _,
@@ -370,11 +367,10 @@ impl<'lt> UserString<'lt> {
                 "3:",
                 "nop",
                 ".pushsection .fix, \"a\", @progbits",
-                ".align 16",
-                ".quad 2b",  // instruction address
-                ".quad 4b - 2b",  // instruction length
-                ".quad 3b",  // fix jump address
-                ".quad 0x2", // type: string
+                ".8byte 2b",  // instruction address
+                ".8byte 4b - 2b",  // instruction length
+                ".8byte 3b",  // fix jump address
+                ".8byte 0x2", // type: string
                 ".popsection",
                 out("t0") _,
                 inout("a0") MAX_LEN => result,

+ 135 - 61
src/kernel/vfs/dentry.rs

@@ -1,7 +1,7 @@
 pub mod dcache;
 
 use super::{
-    inode::{Ino, Inode, Mode, WriteOffset},
+    inode::{Ino, Inode, Mode, RenameData, WriteOffset},
     s_isblk, s_ischr, s_isdir, s_isreg, DevId, FsContext,
 };
 use crate::{
@@ -10,21 +10,22 @@ use crate::{
     kernel::{block::BlockDevice, CharDevice},
     path::{Path, PathComponent},
     prelude::*,
-    rcu::{RCUNode, RCUPointer},
+    rcu::{RCUNode, RCUPointer, RCUReadGuard},
 };
 use crate::{
     io::Stream,
-    kernel::constants::{EEXIST, EINVAL, EISDIR, ELOOP, ENOENT, ENOTDIR, EPERM, ERANGE},
+    kernel::constants::{EEXIST, EINVAL, EIO, EISDIR, ELOOP, ENOENT, ENOTDIR, EPERM, ERANGE},
 };
-use alloc::sync::Arc;
+use alloc::sync::{Arc, Weak};
 use core::{
     fmt,
     hash::{BuildHasher, BuildHasherDefault, Hasher},
     ops::ControlFlow,
-    sync::atomic::{AtomicPtr, Ordering},
+    sync::atomic::{AtomicPtr, AtomicU64, Ordering},
 };
 use eonix_sync::LazyLock;
-use posix_types::{open::OpenFlags, stat::StatX};
+use pointers::BorrowedArc;
+use posix_types::{namei::RenameFlags, open::OpenFlags, result::PosixError, stat::StatX};
 
 struct DentryData {
     inode: Arc<dyn Inode>,
@@ -39,9 +40,9 @@ struct DentryData {
 /// the last reference is dropped.
 pub struct Dentry {
     // Const after insertion into dcache
-    parent: Arc<Dentry>,
-    name: Arc<[u8]>,
-    hash: u64,
+    parent: RCUPointer<Dentry>,
+    name: RCUPointer<Arc<[u8]>>,
+    hash: AtomicU64,
 
     // Used by the dentry cache
     prev: AtomicPtr<Dentry>,
@@ -51,27 +52,29 @@ pub struct Dentry {
     data: RCUPointer<DentryData>,
 }
 
-pub(super) static DROOT: LazyLock<Arc<Dentry>> = LazyLock::new(|| unsafe {
-    let mut dentry = Arc::new_uninit();
-    let parent = dentry.clone().assume_init();
-
-    Arc::get_mut_unchecked(&mut dentry).write(Dentry {
-        parent,
-        name: Arc::from("[root]".as_ref()),
-        hash: 0,
+pub(super) static DROOT: LazyLock<Arc<Dentry>> = LazyLock::new(|| {
+    let root = Arc::new(Dentry {
+        parent: RCUPointer::empty(),
+        name: RCUPointer::new(Arc::new(Arc::from(&b"[root]"[..]))),
+        hash: AtomicU64::new(0),
         prev: AtomicPtr::default(),
         next: AtomicPtr::default(),
         data: RCUPointer::empty(),
     });
 
-    dentry.assume_init()
+    unsafe {
+        root.parent.swap(Some(root.clone()));
+    }
+
+    root.rehash();
+
+    root
 });
 
 impl fmt::Debug for Dentry {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         f.debug_struct("Dentry")
-            .field("name", &String::from_utf8_lossy(&self.name))
-            .field("parent", &String::from_utf8_lossy(&self.parent.name))
+            .field("name", &String::from_utf8_lossy(&self.name()))
             .finish()
     }
 }
@@ -93,14 +96,24 @@ impl RCUNode<Dentry> for Dentry {
 }
 
 impl Dentry {
-    fn rehash(self: &Arc<Self>) -> u64 {
+    fn is_hashed(&self) -> bool {
+        self.prev.load(Ordering::Relaxed) != core::ptr::null_mut()
+    }
+
+    fn rehash(&self) {
+        assert!(
+            !self.is_hashed(),
+            "`rehash()` called on some already hashed dentry"
+        );
+
         let builder: BuildHasherDefault<KernelHasher> = Default::default();
         let mut hasher = builder.build_hasher();
 
         hasher.write_usize(self.parent_addr() as usize);
-        hasher.write(self.name.as_ref());
+        hasher.write(&self.name());
+        let hash = hasher.finish();
 
-        hasher.finish()
+        self.hash.store(hash, Ordering::Relaxed);
     }
 
     fn find(self: &Arc<Self>, name: &[u8]) -> KResult<Arc<Self>> {
@@ -113,15 +126,25 @@ impl Dentry {
 
         match name {
             b"." => Ok(self.clone()),
-            b".." => Ok(self.parent.clone()),
+            b".." => Ok(self.parent().clone()),
             _ => {
                 let dentry = Dentry::create(self.clone(), name);
-                Ok(dcache::d_find_fast(&dentry).unwrap_or_else(|| {
-                    dcache::d_try_revalidate(&dentry);
-                    dcache::d_add(&dentry);
 
-                    dentry
-                }))
+                if let Some(found) = dcache::d_find_fast(&dentry) {
+                    unsafe {
+                        // SAFETY: This is safe because the dentry is never shared with
+                        //         others so we can drop them safely.
+                        let _ = dentry.name.swap(None);
+                        let _ = dentry.parent.swap(None);
+                    }
+
+                    return Ok(found);
+                }
+
+                dcache::d_try_revalidate(&dentry);
+                dcache::d_add(dentry.clone());
+
+                Ok(dentry)
             }
         }
     }
@@ -129,40 +152,44 @@ impl Dentry {
 
 impl Dentry {
     pub fn create(parent: Arc<Dentry>, name: &[u8]) -> Arc<Self> {
-        let mut val = Arc::new(Self {
-            parent,
-            name: Arc::from(name),
-            hash: 0,
+        let val = Arc::new(Self {
+            parent: RCUPointer::new(parent),
+            name: RCUPointer::new(Arc::new(Arc::from(name))),
+            hash: AtomicU64::new(0),
             prev: AtomicPtr::default(),
             next: AtomicPtr::default(),
             data: RCUPointer::empty(),
         });
-        let hash = val.rehash();
-        let val_mut = Arc::get_mut(&mut val).unwrap();
-        val_mut.hash = hash;
 
+        val.rehash();
         val
     }
 
     /// Check the equality of two denties inside the same dentry cache hash group
     /// where `other` is identified by `hash`, `parent` and `name`
     ///
-    fn hash_eq(self: &Arc<Self>, other: &Arc<Self>) -> bool {
-        self.hash == other.hash
+    fn hash_eq(&self, other: &Self) -> bool {
+        self.hash.load(Ordering::Relaxed) == other.hash.load(Ordering::Relaxed)
             && self.parent_addr() == other.parent_addr()
-            && self.name == other.name
+            && &***self.name() == &***other.name()
     }
 
-    pub fn name(&self) -> &Arc<[u8]> {
-        &self.name
+    pub fn name(&self) -> RCUReadGuard<BorrowedArc<Arc<[u8]>>> {
+        self.name.load().expect("Dentry has no name")
     }
 
-    pub fn parent(&self) -> &Arc<Self> {
-        &self.parent
+    pub fn get_name(&self) -> Arc<[u8]> {
+        (***self.name()).clone()
+    }
+
+    pub fn parent<'a>(&self) -> RCUReadGuard<'a, BorrowedArc<Dentry>> {
+        self.parent.load().expect("Dentry has no parent")
     }
 
     pub fn parent_addr(&self) -> *const Self {
-        Arc::as_ptr(&self.parent)
+        self.parent
+            .load()
+            .map_or(core::ptr::null(), |parent| Arc::as_ptr(&parent))
     }
 
     fn save_data(&self, inode: Arc<dyn Inode>, flags: u64) -> KResult<()> {
@@ -251,7 +278,8 @@ impl Dentry {
                 data.inode.readlink(&mut buffer)?;
                 let path = Path::new(buffer.data())?;
 
-                let dentry = Self::open_recursive(context, &dentry.parent, path, true, nrecur + 1)?;
+                let dentry =
+                    Self::open_recursive(context, &dentry.parent(), path, true, nrecur + 1)?;
 
                 Self::resolve_directory(context, dentry, nrecur + 1)
             }
@@ -291,7 +319,8 @@ impl Dentry {
                 PathComponent::TrailingEmpty | PathComponent::Current => {} // pass
                 PathComponent::Parent => {
                     if !cwd.hash_eq(&context.fsroot) {
-                        cwd = Self::resolve_directory(context, cwd.parent.clone(), nrecur)?;
+                        let parent = cwd.parent().clone();
+                        cwd = Self::resolve_directory(context, parent, nrecur)?;
                     }
                     continue;
                 }
@@ -314,7 +343,8 @@ impl Dentry {
                     data.inode.readlink(&mut buffer)?;
                     let path = Path::new(buffer.data())?;
 
-                    cwd = Self::open_recursive(context, &cwd.parent, path, true, nrecur + 1)?;
+                    let parent = cwd.parent().clone();
+                    cwd = Self::open_recursive(context, &parent, path, true, nrecur + 1)?;
                 }
             }
         }
@@ -341,19 +371,26 @@ impl Dentry {
         context: &FsContext,
         buffer: &mut dyn Buffer,
     ) -> KResult<()> {
-        let mut dentry = self;
-        let root = &context.fsroot;
+        let locked_parent = self.parent();
+
+        let path = {
+            let mut path = vec![];
 
-        let mut path = vec![];
+            let mut parent = locked_parent.borrow();
+            let mut dentry = BorrowedArc::new(self);
 
-        while Arc::as_ptr(dentry) != Arc::as_ptr(root) {
-            if path.len() > 32 {
-                return Err(ELOOP);
+            while Arc::as_ptr(&dentry) != Arc::as_ptr(&context.fsroot) {
+                if path.len() > 32 {
+                    return Err(ELOOP);
+                }
+
+                path.push(dentry.name().clone());
+                dentry = parent;
+                parent = dentry.parent.load_protected(&locked_parent).unwrap();
             }
 
-            path.push(dentry.name().clone());
-            dentry = dentry.parent();
-        }
+            path
+        };
 
         buffer.fill(b"/")?.ok_or(ERANGE)?;
         for item in path.iter().rev().map(|name| name.as_ref()) {
@@ -403,14 +440,16 @@ impl Dentry {
     where
         F: FnMut(&[u8], Ino) -> KResult<ControlFlow<(), ()>>,
     {
-        self.get_inode()?.do_readdir(offset, &mut callback)
+        let dir = self.get_inode()?;
+        dir.do_readdir(offset, &mut callback)
     }
 
     pub fn mkdir(&self, mode: Mode) -> KResult<()> {
         if self.get_inode().is_ok() {
             Err(EEXIST)
         } else {
-            self.parent.get_inode().unwrap().mkdir(self, mode)
+            let dir = self.parent().get_inode()?;
+            dir.mkdir(self, mode)
         }
     }
 
@@ -426,7 +465,8 @@ impl Dentry {
         if self.get_inode().is_err() {
             Err(ENOENT)
         } else {
-            self.parent.get_inode().unwrap().unlink(self)
+            let dir = self.parent().get_inode()?;
+            dir.unlink(self)
         }
     }
 
@@ -434,7 +474,8 @@ impl Dentry {
         if self.get_inode().is_ok() {
             Err(EEXIST)
         } else {
-            self.parent.get_inode().unwrap().symlink(self, link)
+            let dir = self.parent().get_inode()?;
+            dir.symlink(self, link)
         }
     }
 
@@ -446,11 +487,44 @@ impl Dentry {
         if self.get_inode().is_ok() {
             Err(EEXIST)
         } else {
-            self.parent.get_inode().unwrap().mknod(self, mode, devid)
+            let dir = self.parent().get_inode()?;
+            dir.mknod(self, mode, devid)
         }
     }
 
     pub fn chmod(&self, mode: Mode) -> KResult<()> {
         self.get_inode()?.chmod(mode)
     }
+
+    pub fn chown(&self, uid: u32, gid: u32) -> KResult<()> {
+        self.get_inode()?.chown(uid, gid)
+    }
+
+    pub fn rename(self: &Arc<Self>, new: &Arc<Self>, flags: RenameFlags) -> KResult<()> {
+        if Arc::ptr_eq(self, new) {
+            return Ok(());
+        }
+
+        let old_parent = self.parent().get_inode()?;
+        let new_parent = new.parent().get_inode()?;
+
+        // If the two dentries are not in the same filesystem, return EXDEV.
+        if !Weak::ptr_eq(&old_parent.vfs, &new_parent.vfs) {
+            Err(PosixError::EXDEV)?;
+        }
+
+        let vfs = old_parent.vfs.upgrade().ok_or(EIO)?;
+
+        let rename_data = RenameData {
+            old_dentry: self,
+            new_dentry: new,
+            new_parent,
+            vfs,
+            is_exchange: flags.contains(RenameFlags::RENAME_EXCHANGE),
+            no_replace: flags.contains(RenameFlags::RENAME_NOREPLACE),
+        };
+
+        // Delegate to the parent directory's rename implementation
+        old_parent.rename(rename_data)
+    }
 }

+ 39 - 10
src/kernel/vfs/dentry/dcache.rs

@@ -1,5 +1,6 @@
 use super::{Dentry, Inode};
 use crate::kernel::constants::ENOENT;
+use crate::rcu::RCUPointer;
 use crate::{
     kernel::vfs::{s_isdir, s_islnk},
     prelude::*,
@@ -7,28 +8,32 @@ use crate::{
 };
 use alloc::sync::Arc;
 use core::sync::atomic::Ordering;
+use eonix_runtime::task::Task;
+use eonix_sync::Mutex;
 
 const DCACHE_HASH_BITS: u32 = 8;
 
 static DCACHE: [RCUList<Dentry>; 1 << DCACHE_HASH_BITS] =
     [const { RCUList::new() }; 1 << DCACHE_HASH_BITS];
 
-pub fn d_hinted(hash: u64) -> &'static RCUList<Dentry> {
-    let hash = hash as usize & ((1 << DCACHE_HASH_BITS) - 1);
+static D_EXCHANGE_LOCK: Mutex<()> = Mutex::new(());
+
+pub fn d_hinted(dentry: &Dentry) -> &'static RCUList<Dentry> {
+    let hash = dentry.hash.load(Ordering::Relaxed) as usize & ((1 << DCACHE_HASH_BITS) - 1);
     &DCACHE[hash]
 }
 
-pub fn d_iter_for(hash: u64) -> RCUIterator<'static, Dentry> {
-    d_hinted(hash).iter()
+pub fn d_iter_for(dentry: &Dentry) -> RCUIterator<'static, Dentry> {
+    d_hinted(dentry).iter()
 }
 
 /// Add the dentry to the dcache
-pub fn d_add(dentry: &Arc<Dentry>) {
-    d_hinted(dentry.hash).insert(dentry.clone());
+pub fn d_add(dentry: Arc<Dentry>) {
+    d_hinted(&dentry).insert(dentry);
 }
 
-pub fn d_find_fast(dentry: &Arc<Dentry>) -> Option<Arc<Dentry>> {
-    d_iter_for(dentry.rehash())
+pub fn d_find_fast(dentry: &Dentry) -> Option<Arc<Dentry>> {
+    d_iter_for(dentry)
         .find(|cur| cur.hash_eq(dentry))
         .map(|dentry| dentry.clone())
 }
@@ -37,6 +42,8 @@ pub fn d_find_fast(dentry: &Arc<Dentry>) -> Option<Arc<Dentry>> {
 ///
 /// Silently fail without any side effects
 pub fn d_try_revalidate(dentry: &Arc<Dentry>) {
+    let _lock = Task::block_on(D_EXCHANGE_LOCK.lock());
+
     (|| -> KResult<()> {
         let parent = dentry.parent().get_inode()?;
         let inode = parent.lookup(dentry)?.ok_or(ENOENT)?;
@@ -59,10 +66,32 @@ pub fn d_save(dentry: &Arc<Dentry>, inode: Arc<dyn Inode>) -> KResult<()> {
 
 /// Replace the old dentry with the new one in the dcache
 pub fn d_replace(old: &Arc<Dentry>, new: Arc<Dentry>) {
-    d_hinted(old.hash).replace(old, new);
+    d_hinted(old).replace(old, new);
 }
 
 /// Remove the dentry from the dcache so that later d_find_fast will fail
 pub fn d_remove(dentry: &Arc<Dentry>) {
-    d_hinted(dentry.hash).remove(&dentry);
+    d_hinted(dentry).remove(&dentry);
+}
+
+pub async fn d_exchange(old: &Arc<Dentry>, new: &Arc<Dentry>) {
+    if Arc::ptr_eq(old, new) {
+        return;
+    }
+
+    let _lock = D_EXCHANGE_LOCK.lock().await;
+
+    d_remove(old);
+    d_remove(new);
+
+    unsafe {
+        RCUPointer::exchange(&old.parent, &new.parent);
+        RCUPointer::exchange(&old.name, &new.name);
+    }
+
+    old.rehash();
+    new.rehash();
+
+    d_add(old.clone());
+    d_add(new.clone());
 }

+ 129 - 42
src/kernel/vfs/file.rs

@@ -1,7 +1,7 @@
 use super::{
     dentry::Dentry,
     inode::{Mode, WriteOffset},
-    s_isblk, s_isdir, s_isreg,
+    s_isblk, s_isreg,
 };
 use crate::{
     io::{Buffer, BufferFill, ByteBuffer, Chunks, IntoStream},
@@ -24,10 +24,13 @@ use crate::{
 };
 use alloc::{collections::vec_deque::VecDeque, sync::Arc};
 use bitflags::bitflags;
-use core::{ops::ControlFlow, sync::atomic::Ordering};
+use core::{
+    ops::{ControlFlow, Deref},
+    sync::atomic::{AtomicU32, Ordering},
+};
 use eonix_runtime::task::Task;
 use eonix_sync::Mutex;
-use posix_types::{signal::Signal, stat::StatX};
+use posix_types::{open::OpenFlags, signal::Signal, stat::StatX};
 
 pub struct InodeFile {
     read: bool,
@@ -70,7 +73,7 @@ pub struct TerminalFile {
 //       `Clone` semantics.
 //
 //       e.g. The `CharDevice` itself is stateless.
-pub enum File {
+pub enum FileType {
     Inode(InodeFile),
     PipeRead(PipeReadEnd),
     PipeWrite(PipeWriteEnd),
@@ -78,6 +81,11 @@ pub enum File {
     CharDev(Arc<CharDevice>),
 }
 
+pub struct File {
+    flags: AtomicU32,
+    file_type: FileType,
+}
+
 pub enum SeekOption {
     Set(usize),
     Current(isize),
@@ -87,6 +95,7 @@ pub enum SeekOption {
 bitflags! {
     pub struct PollEvent: u16 {
         const Readable = 0x0001;
+        const Writable = 0x0002;
     }
 }
 
@@ -110,8 +119,10 @@ fn send_sigpipe_to_current() {
 impl Pipe {
     const PIPE_SIZE: usize = 4096;
 
-    pub fn new() -> Arc<Self> {
-        Arc::new(Self {
+    /// # Return
+    /// `(read_end, write_end)`
+    pub fn new(flags: OpenFlags) -> (Arc<File>, Arc<File>) {
+        let pipe = Arc::new(Self {
             inner: Mutex::new(PipeInner {
                 buffer: VecDeque::with_capacity(Self::PIPE_SIZE),
                 read_closed: false,
@@ -119,15 +130,21 @@ impl Pipe {
             }),
             cv_read: CondVar::new(),
             cv_write: CondVar::new(),
-        })
-    }
+        });
+
+        let read_flags = flags.difference(OpenFlags::O_WRONLY | OpenFlags::O_RDWR);
+        let mut write_flags = read_flags;
+        write_flags.insert(OpenFlags::O_WRONLY);
 
-    /// # Return
-    /// `(read_end, write_end)`
-    pub fn split(self: &Arc<Self>) -> (Arc<File>, Arc<File>) {
         (
-            Arc::new(File::PipeRead(PipeReadEnd { pipe: self.clone() })),
-            Arc::new(File::PipeWrite(PipeWriteEnd { pipe: self.clone() })),
+            Arc::new(File {
+                flags: AtomicU32::new(read_flags.bits()),
+                file_type: FileType::PipeRead(PipeReadEnd { pipe: pipe.clone() }),
+            }),
+            Arc::new(File {
+                flags: AtomicU32::new(write_flags.bits()),
+                file_type: FileType::PipeWrite(PipeWriteEnd { pipe }),
+            }),
         )
     }
 
@@ -151,6 +168,32 @@ impl Pipe {
         self.cv_read.notify_all();
     }
 
+    async fn poll(&self, event: PollEvent) -> KResult<PollEvent> {
+        if !event.contains(PollEvent::Readable) {
+            unimplemented!("Poll event not supported.");
+        }
+
+        let mut inner = self.inner.lock().await;
+        while inner.buffer.is_empty() && !inner.write_closed {
+            inner = self.cv_read.wait(inner).await;
+        }
+
+        if Thread::current().signal_list.has_pending_signal() {
+            return Err(EINTR);
+        }
+
+        let mut retval = PollEvent::empty();
+        if inner.write_closed {
+            retval |= PollEvent::Writable;
+        }
+
+        if !inner.buffer.is_empty() {
+            retval |= PollEvent::Readable;
+        }
+
+        Ok(retval)
+    }
+
     async fn read(&self, buffer: &mut dyn Buffer) -> KResult<usize> {
         let mut inner = self.inner.lock().await;
 
@@ -239,7 +282,7 @@ struct UserDirent {
 }
 
 impl InodeFile {
-    pub fn new(dentry: Arc<Dentry>, rwa: (bool, bool, bool)) -> Arc<File> {
+    pub fn new(dentry: Arc<Dentry>, flags: OpenFlags) -> Arc<File> {
         // SAFETY: `dentry` used to create `InodeFile` is valid.
         // SAFETY: `mode` should never change with respect to the `S_IFMT` fields.
         let cached_mode = dentry
@@ -249,14 +292,19 @@ impl InodeFile {
             .load(Ordering::Relaxed)
             & S_IFMT;
 
-        Arc::new(File::Inode(InodeFile {
-            dentry,
-            read: rwa.0,
-            write: rwa.1,
-            append: rwa.2,
-            mode: cached_mode,
-            cursor: Mutex::new(0),
-        }))
+        let (read, write, append) = flags.as_rwa();
+
+        Arc::new(File {
+            flags: AtomicU32::new(flags.bits()),
+            file_type: FileType::Inode(InodeFile {
+                dentry,
+                read,
+                write,
+                append,
+                mode: cached_mode,
+                cursor: Mutex::new(0),
+            }),
+        })
     }
 
     fn seek(&self, option: SeekOption) -> KResult<usize> {
@@ -369,8 +417,11 @@ impl InodeFile {
 }
 
 impl TerminalFile {
-    pub fn new(tty: Arc<Terminal>) -> Arc<File> {
-        Arc::new(File::TTY(TerminalFile { terminal: tty }))
+    pub fn new(tty: Arc<Terminal>, flags: OpenFlags) -> Arc<File> {
+        Arc::new(File {
+            flags: AtomicU32::new(flags.bits()),
+            file_type: FileType::TTY(TerminalFile { terminal: tty }),
+        })
     }
 
     async fn read(&self, buffer: &mut dyn Buffer) -> KResult<usize> {
@@ -404,13 +455,13 @@ impl TerminalFile {
     }
 }
 
-impl File {
+impl FileType {
     pub async fn read(&self, buffer: &mut dyn Buffer) -> KResult<usize> {
         match self {
-            File::Inode(inode) => inode.read(buffer),
-            File::PipeRead(pipe) => pipe.pipe.read(buffer).await,
-            File::TTY(tty) => tty.read(buffer).await,
-            File::CharDev(device) => device.read(buffer),
+            FileType::Inode(inode) => inode.read(buffer),
+            FileType::PipeRead(pipe) => pipe.pipe.read(buffer).await,
+            FileType::TTY(tty) => tty.read(buffer).await,
+            FileType::CharDev(device) => device.read(buffer),
             _ => Err(EBADF),
         }
     }
@@ -432,31 +483,31 @@ impl File {
 
     pub async fn write(&self, stream: &mut dyn Stream) -> KResult<usize> {
         match self {
-            File::Inode(inode) => inode.write(stream),
-            File::PipeWrite(pipe) => pipe.pipe.write(stream).await,
-            File::TTY(tty) => tty.write(stream),
-            File::CharDev(device) => device.write(stream),
+            FileType::Inode(inode) => inode.write(stream),
+            FileType::PipeWrite(pipe) => pipe.pipe.write(stream).await,
+            FileType::TTY(tty) => tty.write(stream),
+            FileType::CharDev(device) => device.write(stream),
             _ => Err(EBADF),
         }
     }
 
     pub fn seek(&self, option: SeekOption) -> KResult<usize> {
         match self {
-            File::Inode(inode) => inode.seek(option),
+            FileType::Inode(inode) => inode.seek(option),
             _ => Err(ESPIPE),
         }
     }
 
     pub fn getdents(&self, buffer: &mut dyn Buffer) -> KResult<()> {
         match self {
-            File::Inode(inode) => inode.getdents(buffer),
+            FileType::Inode(inode) => inode.getdents(buffer),
             _ => Err(ENOTDIR),
         }
     }
 
     pub fn getdents64(&self, buffer: &mut dyn Buffer) -> KResult<()> {
         match self {
-            File::Inode(inode) => inode.getdents64(buffer),
+            FileType::Inode(inode) => inode.getdents64(buffer),
             _ => Err(ENOTDIR),
         }
     }
@@ -467,7 +518,7 @@ impl File {
         let buffer = unsafe { buffer_page.as_memblk().as_bytes_mut() };
 
         match self {
-            File::Inode(file) if s_isblk(file.mode) || s_isreg(file.mode) => (),
+            FileType::Inode(file) if s_isblk(file.mode) || s_isreg(file.mode) => (),
             _ => return Err(EINVAL),
         }
 
@@ -494,30 +545,66 @@ impl File {
 
     pub fn ioctl(&self, request: usize, arg3: usize) -> KResult<usize> {
         match self {
-            File::TTY(tty) => tty.ioctl(request, arg3).map(|_| 0),
+            FileType::TTY(tty) => tty.ioctl(request, arg3).map(|_| 0),
             _ => Err(ENOTTY),
         }
     }
 
     pub async fn poll(&self, event: PollEvent) -> KResult<PollEvent> {
         match self {
-            File::Inode(_) => Ok(event),
-            File::TTY(tty) => tty.poll(event).await,
+            FileType::Inode(_) => Ok(event),
+            FileType::TTY(tty) => tty.poll(event).await,
+            FileType::PipeRead(PipeReadEnd { pipe })
+            | FileType::PipeWrite(PipeWriteEnd { pipe }) => pipe.poll(event).await,
             _ => unimplemented!("Poll event not supported."),
         }
     }
 
     pub fn statx(&self, buffer: &mut StatX, mask: u32) -> KResult<()> {
         match self {
-            File::Inode(inode) => inode.dentry.statx(buffer, mask),
+            FileType::Inode(inode) => inode.dentry.statx(buffer, mask),
             _ => Err(EBADF),
         }
     }
 
     pub fn as_path(&self) -> Option<&Arc<Dentry>> {
         match self {
-            File::Inode(inode_file) if s_isdir(inode_file.mode) => Some(&inode_file.dentry),
+            FileType::Inode(inode_file) => Some(&inode_file.dentry),
             _ => None,
         }
     }
 }
+
+impl File {
+    pub fn new(flags: OpenFlags, file_type: FileType) -> Arc<Self> {
+        Arc::new(Self {
+            flags: AtomicU32::new(flags.bits()),
+            file_type,
+        })
+    }
+
+    pub fn get_flags(&self) -> OpenFlags {
+        OpenFlags::from_bits_retain(self.flags.load(Ordering::Relaxed))
+    }
+
+    pub fn set_flags(&self, flags: OpenFlags) {
+        let flags = flags.difference(
+            OpenFlags::O_WRONLY
+                | OpenFlags::O_RDWR
+                | OpenFlags::O_CREAT
+                | OpenFlags::O_TRUNC
+                | OpenFlags::O_EXCL,
+            // | OpenFlags::O_NOCTTY,
+        );
+
+        self.flags.store(flags.bits(), Ordering::Relaxed);
+    }
+}
+
+impl Deref for File {
+    type Target = FileType;
+
+    fn deref(&self) -> &Self::Target {
+        &self.file_type
+    }
+}

+ 22 - 11
src/kernel/vfs/filearray.rs

@@ -4,7 +4,9 @@ use super::{
     s_ischr, Spin,
 };
 use crate::kernel::{
-    constants::{EBADF, EISDIR, ENOTDIR, F_DUPFD, F_DUPFD_CLOEXEC, F_GETFD, F_SETFD},
+    constants::{
+        EBADF, EISDIR, ENOTDIR, F_DUPFD, F_DUPFD_CLOEXEC, F_GETFD, F_GETFL, F_SETFD, F_SETFL,
+    },
     syscall::{FromSyscallArg, SyscallRetVal},
 };
 use crate::{
@@ -79,9 +81,11 @@ impl FileArray {
     }
 
     pub fn close_all(&self) {
-        let mut inner = self.inner.lock();
-        inner.fd_min_avail = FD(0);
-        inner.files.clear();
+        let _old_files = {
+            let mut inner = self.inner.lock();
+            inner.fd_min_avail = FD(0);
+            core::mem::take(&mut inner.files)
+        };
     }
 
     pub fn close(&self, fd: FD) -> KResult<()> {
@@ -162,8 +166,7 @@ impl FileArray {
 
         let fdflag = flags.as_fd_flags();
 
-        let pipe = Pipe::new();
-        let (read_end, write_end) = pipe.split();
+        let (read_end, write_end) = Pipe::new(flags);
         inner.do_insert(read_fd, fdflag, read_end);
         inner.do_insert(write_fd, fdflag, write_end);
 
@@ -194,9 +197,9 @@ impl FileArray {
 
         let file = if s_ischr(filemode) {
             let device = CharDevice::get(inode.devid()?).ok_or(ENXIO)?;
-            device.open()?
+            device.open(flags)?
         } else {
-            InodeFile::new(dentry.clone(), flags.as_rwa())
+            InodeFile::new(dentry.clone(), flags)
         };
 
         let mut inner = self.inner.lock();
@@ -229,6 +232,14 @@ impl FileArray {
                 ofile.flags = FDFlags::from_bits_truncate(arg as u32);
                 Ok(0)
             }
+            F_GETFL => Ok(ofile.file.get_flags().bits() as usize),
+            F_SETFL => {
+                ofile
+                    .file
+                    .set_flags(OpenFlags::from_bits_retain(arg as u32));
+
+                Ok(0)
+            }
             _ => unimplemented!("fcntl: cmd={}", cmd),
         }
     }
@@ -242,17 +253,17 @@ impl FileArray {
         inner.do_insert(
             stdin,
             FDFlags::FD_CLOEXEC,
-            TerminalFile::new(console_terminal.clone()),
+            TerminalFile::new(console_terminal.clone(), OpenFlags::empty()),
         );
         inner.do_insert(
             stdout,
             FDFlags::FD_CLOEXEC,
-            TerminalFile::new(console_terminal.clone()),
+            TerminalFile::new(console_terminal.clone(), OpenFlags::empty()),
         );
         inner.do_insert(
             stderr,
             FDFlags::FD_CLOEXEC,
-            TerminalFile::new(console_terminal.clone()),
+            TerminalFile::new(console_terminal.clone(), OpenFlags::empty()),
         );
     }
 }

+ 33 - 17
src/kernel/vfs/inode.rs

@@ -1,9 +1,10 @@
-use super::{dentry::Dentry, s_isblk, s_ischr, vfs::Vfs, DevId, TimeSpec};
+use super::{dentry::Dentry, s_isblk, s_ischr, vfs::Vfs, DevId};
 use crate::io::Stream;
 use crate::kernel::constants::{
     EINVAL, EISDIR, ENOTDIR, EPERM, STATX_ATIME, STATX_BLOCKS, STATX_CTIME, STATX_GID, STATX_INO,
     STATX_MODE, STATX_MTIME, STATX_NLINK, STATX_SIZE, STATX_TYPE, STATX_UID, S_IFDIR, S_IFMT,
 };
+use crate::kernel::timer::Instant;
 use crate::{io::Buffer, prelude::*};
 use alloc::sync::{Arc, Weak};
 use core::{
@@ -14,6 +15,7 @@ use core::{
 };
 use eonix_runtime::task::Task;
 use eonix_sync::RwLock;
+use posix_types::namei::RenameFlags;
 use posix_types::stat::StatX;
 
 pub type Ino = u64;
@@ -42,9 +44,9 @@ pub struct InodeData {
     pub gid: AtomicGid,
     pub mode: AtomicMode,
 
-    pub atime: Spin<TimeSpec>,
-    pub ctime: Spin<TimeSpec>,
-    pub mtime: Spin<TimeSpec>,
+    pub atime: Spin<Instant>,
+    pub ctime: Spin<Instant>,
+    pub mtime: Spin<Instant>,
 
     pub rwsem: RwLock<()>,
 
@@ -56,9 +58,9 @@ impl InodeData {
         Self {
             ino,
             vfs,
-            atime: Spin::new(TimeSpec::default()),
-            ctime: Spin::new(TimeSpec::default()),
-            mtime: Spin::new(TimeSpec::default()),
+            atime: Spin::new(Instant::default()),
+            ctime: Spin::new(Instant::default()),
+            mtime: Spin::new(Instant::default()),
             rwsem: RwLock::new(()),
             size: AtomicU64::new(0),
             nlink: AtomicNlink::new(0),
@@ -82,8 +84,17 @@ pub enum WriteOffset<'end> {
     End(&'end mut usize),
 }
 
+pub struct RenameData<'a, 'b> {
+    pub old_dentry: &'a Arc<Dentry>,
+    pub new_dentry: &'b Arc<Dentry>,
+    pub new_parent: Arc<dyn Inode>,
+    pub vfs: Arc<dyn Vfs>,
+    pub is_exchange: bool,
+    pub no_replace: bool,
+}
+
 #[allow(unused_variables)]
-pub trait Inode: Send + Sync + InodeInner {
+pub trait Inode: Send + Sync + InodeInner + Any {
     fn is_dir(&self) -> bool {
         self.mode.load(Ordering::SeqCst) & S_IFDIR != 0
     }
@@ -132,6 +143,10 @@ pub trait Inode: Send + Sync + InodeInner {
         Err(if self.is_dir() { EISDIR } else { EPERM })
     }
 
+    fn rename(&self, rename_data: RenameData) -> KResult<()> {
+        Err(if !self.is_dir() { ENOTDIR } else { EPERM })
+    }
+
     fn do_readdir(
         &self,
         offset: usize,
@@ -144,6 +159,10 @@ pub trait Inode: Send + Sync + InodeInner {
         Err(EPERM)
     }
 
+    fn chown(&self, uid: u32, gid: u32) -> KResult<()> {
+        Err(EPERM)
+    }
+
     fn statx(&self, stat: &mut StatX, mask: u32) -> KResult<()> {
         // Safety: ffi should have checked reference
         let vfs = self.vfs.upgrade().expect("Vfs is dropped");
@@ -157,23 +176,20 @@ pub trait Inode: Send + Sync + InodeInner {
         }
 
         if mask & STATX_ATIME != 0 {
-            let atime = self.atime.lock();
-            stat.stx_atime.tv_nsec = atime.nsec as _;
-            stat.stx_atime.tv_sec = atime.sec as _;
+            let atime = *self.atime.lock();
+            stat.stx_atime = atime.into();
             stat.stx_mask |= STATX_ATIME;
         }
 
         if mask & STATX_MTIME != 0 {
-            let mtime = self.mtime.lock();
-            stat.stx_mtime.tv_nsec = mtime.nsec as _;
-            stat.stx_mtime.tv_sec = mtime.sec as _;
+            let mtime = *self.mtime.lock();
+            stat.stx_mtime = mtime.into();
             stat.stx_mask |= STATX_MTIME;
         }
 
         if mask & STATX_CTIME != 0 {
-            let ctime = self.ctime.lock();
-            stat.stx_ctime.tv_nsec = ctime.nsec as _;
-            stat.stx_ctime.tv_sec = ctime.sec as _;
+            let ctime = *self.ctime.lock();
+            stat.stx_ctime = ctime.into();
             stat.stx_mask |= STATX_CTIME;
         }
 

+ 3 - 2
src/kernel/vfs/mount.rs

@@ -36,7 +36,7 @@ pub struct Mount {
 
 impl Mount {
     pub fn new(mp: &Dentry, vfs: Arc<dyn Vfs>, root_inode: Arc<dyn Inode>) -> KResult<Self> {
-        let root_dentry = Dentry::create(mp.parent().clone(), mp.name());
+        let root_dentry = Dentry::create(mp.parent().clone(), &mp.get_name());
         root_dentry.save_dir(root_inode)?;
 
         Ok(Self {
@@ -54,6 +54,7 @@ unsafe impl Send for Mount {}
 unsafe impl Sync for Mount {}
 
 pub trait MountCreator: Send + Sync {
+    fn check_signature(&self, first_block: &[u8]) -> KResult<bool>;
     fn create_mount(&self, source: &str, flags: u64, mp: &Arc<Dentry>) -> KResult<Mount>;
 }
 
@@ -170,7 +171,7 @@ impl Dentry {
 
             let root_dentry = mount.root().clone();
 
-            dcache::d_add(&root_dentry);
+            dcache::d_add(root_dentry.clone());
 
             MOUNTS.lock().push((
                 DROOT.clone(),

+ 2 - 0
src/lib.rs

@@ -139,11 +139,13 @@ async fn init_process(early_kstack: PRange) {
         driver::virtio::init_virtio_devices();
         driver::e1000e::register_e1000e_driver();
         driver::ahci::register_ahci_driver();
+        driver::goldfish_rtc::probe();
     }
 
     fs::tmpfs::init();
     fs::procfs::init();
     fs::fat32::init();
+    fs::ext4::init();
 
     let load_info = {
         // mount fat32 /mnt directory

+ 35 - 3
src/rcu.rs

@@ -17,14 +17,22 @@ pub struct RCUReadGuard<'data, T: 'data> {
 
 static GLOBAL_RCU_SEM: RwLock<()> = RwLock::new(());
 
-impl<'data, T: 'data> RCUReadGuard<'data, T> {
-    fn lock(value: T) -> Self {
+impl<'data, T> RCUReadGuard<'data, BorrowedArc<'data, T>> {
+    fn lock(value: BorrowedArc<'data, T>) -> Self {
         Self {
             value,
             _guard: Task::block_on(GLOBAL_RCU_SEM.read()),
             _phantom: PhantomData,
         }
     }
+
+    pub fn borrow(&self) -> BorrowedArc<'data, T> {
+        unsafe {
+            BorrowedArc::from_raw(NonNull::new_unchecked(
+                &raw const *self.value.borrow() as *mut T
+            ))
+        }
+    }
 }
 
 impl<'data, T: 'data> Deref for RCUReadGuard<'data, T> {
@@ -194,15 +202,26 @@ impl<T: core::fmt::Debug> core::fmt::Debug for RCUPointer<T> {
 }
 
 impl<T> RCUPointer<T> {
-    pub fn empty() -> Self {
+    pub const fn empty() -> Self {
         Self(AtomicPtr::new(core::ptr::null_mut()))
     }
 
+    pub fn new(value: Arc<T>) -> Self {
+        Self(AtomicPtr::new(Arc::into_raw(value) as *mut T))
+    }
+
     pub fn load<'lt>(&self) -> Option<RCUReadGuard<'lt, BorrowedArc<'lt, T>>> {
         NonNull::new(self.0.load(Ordering::Acquire))
             .map(|p| RCUReadGuard::lock(unsafe { BorrowedArc::from_raw(p) }))
     }
 
+    pub fn load_protected<'a, U: 'a>(
+        &self,
+        _guard: &RCUReadGuard<'a, U>,
+    ) -> Option<BorrowedArc<'a, T>> {
+        NonNull::new(self.0.load(Ordering::Acquire)).map(|p| unsafe { BorrowedArc::from_raw(p) })
+    }
+
     /// # Safety
     /// Caller must ensure no writers are updating the pointer.
     pub unsafe fn load_locked<'lt>(&self) -> Option<BorrowedArc<'lt, T>> {
@@ -224,6 +243,19 @@ impl<T> RCUPointer<T> {
             Some(unsafe { Arc::from_raw(old) })
         }
     }
+
+    /// Exchange the value of the pointers.
+    ///
+    /// # Safety
+    /// Presence of readers is acceptable.
+    /// But the caller must ensure that we are the only one **altering** the pointers.
+    pub unsafe fn exchange(old: &Self, new: &Self) {
+        let old_value = old.0.load(Ordering::Acquire);
+
+        let new_value = new.0.swap(old_value, Ordering::AcqRel);
+
+        old.0.store(new_value, Ordering::Release);
+    }
 }
 
 impl<T> Drop for RCUPointer<T> {

+ 9 - 0
user-programs/init_script_riscv64.sh

@@ -25,6 +25,7 @@ do_or_freeze $BUSYBOX mknod -m 666 /dev/null c 1 3
 do_or_freeze $BUSYBOX mknod -m 666 /dev/zero c 1 5
 do_or_freeze $BUSYBOX mknod -m 666 /dev/sda b 8 0
 do_or_freeze $BUSYBOX mknod -m 666 /dev/sda1 b 8 1
+do_or_freeze $BUSYBOX mknod -m 666 /dev/sdb b 8 16
 do_or_freeze $BUSYBOX mknod -m 666 /dev/ttyS0 c 4 64
 do_or_freeze $BUSYBOX mknod -m 666 /dev/ttyS1 c 4 65
 
@@ -41,6 +42,14 @@ echo ok >&2
 do_or_freeze mkdir -p /etc /root /proc
 do_or_freeze mount -t procfs proc proc
 
+# Check if the device /dev/sdb is available and can be read
+if dd if=/dev/sdb of=/dev/null bs=512 count=1; then
+    echo -n -e "Mounting the ext4 image... " >&2
+    do_or_freeze mkdir -p /mnt1
+    do_or_freeze mount -t ext4 /dev/sdb /mnt1
+    echo ok >&2
+fi
+
 cp /mnt/ld-musl-i386.so.1 /lib/ld-musl-i386.so.1
 ln -s /lib/ld-musl-i386.so.1 /lib/libc.so
 

+ 9 - 0
user-programs/init_script_x86_64.sh

@@ -25,6 +25,7 @@ do_or_freeze $BUSYBOX mknod -m 666 /dev/null c 1 3
 do_or_freeze $BUSYBOX mknod -m 666 /dev/zero c 1 5
 do_or_freeze $BUSYBOX mknod -m 666 /dev/sda b 8 0
 do_or_freeze $BUSYBOX mknod -m 666 /dev/sda1 b 8 1
+do_or_freeze $BUSYBOX mknod -m 666 /dev/sdb b 8 16
 do_or_freeze $BUSYBOX mknod -m 666 /dev/ttyS0 c 4 64
 do_or_freeze $BUSYBOX mknod -m 666 /dev/ttyS1 c 4 65
 
@@ -41,6 +42,14 @@ echo ok >&2
 do_or_freeze mkdir -p /etc /root /proc
 do_or_freeze mount -t procfs proc proc
 
+# Check if the device /dev/sdb is available and can be read
+if dd if=/dev/sdb of=/dev/null bs=512 count=1; then
+    echo -n -e "Mounting the ext4 image... " >&2
+    do_or_freeze mkdir -p /mnt1
+    do_or_freeze mount -t ext4 /dev/sdb /mnt1
+    echo ok >&2
+fi
+
 cp /mnt/ld-musl-i386.so.1 /lib/ld-musl-i386.so.1
 ln -s /lib/ld-musl-i386.so.1 /lib/libc.so