Преглед на файлове

Merge branch 'fix' into shiai-master

greatbridf преди 3 месеца
родител
ревизия
6f90bfbd38
променени са 57 файла, в които са добавени 1191 реда и са изтрити 618 реда
  1. 1 12
      CMakeLists.txt
  2. 1 1
      Cargo.toml
  3. 13 0
      Makefile.src
  4. 5 4
      configure
  5. 19 0
      init_script.sh
  6. 35 0
      script/build-img.sh
  7. 2 0
      src/driver/ahci/control.rs
  8. 3 1
      src/driver/ahci/defs.rs
  9. 1 0
      src/driver/ahci/mod.rs
  10. 2 0
      src/driver/ahci/port.rs
  11. 10 18
      src/driver/e1000e.rs
  12. 1 0
      src/driver/serial.rs
  13. 45 74
      src/elf.rs
  14. 35 180
      src/fs/fat32.rs
  15. 240 0
      src/fs/fat32/dir.rs
  16. 34 0
      src/fs/fat32/file.rs
  17. 4 1
      src/fs/procfs.rs
  18. 26 0
      src/fs/tmpfs.rs
  19. 1 0
      src/intrusive_list.rs
  20. 22 80
      src/io.rs
  21. 1 0
      src/kernel.rs
  22. 3 15
      src/kernel/block.rs
  23. 1 0
      src/kernel/chardev.rs
  24. 6 0
      src/kernel/constants.rs
  25. 2 2
      src/kernel/mem.rs
  26. 3 3
      src/kernel/mem/address.rs
  27. 93 1
      src/kernel/mem/mm_area.rs
  28. 160 62
      src/kernel/mem/mm_list.rs
  29. 6 78
      src/kernel/mem/mm_list/page_fault.rs
  30. 18 4
      src/kernel/mem/page_table.rs
  31. 11 34
      src/kernel/mem/paging.rs
  32. 1 0
      src/kernel/mem/phys.rs
  33. 3 0
      src/kernel/syscall.rs
  34. 35 15
      src/kernel/syscall/file_rw.rs
  35. 7 0
      src/kernel/syscall/mm.rs
  36. 133 9
      src/kernel/syscall/procops.rs
  37. 65 3
      src/kernel/syscall/sysinfo.rs
  38. 1 0
      src/kernel/task/kstack.rs
  39. 1 1
      src/kernel/task/process.rs
  40. 7 2
      src/kernel/task/process_list.rs
  41. 8 0
      src/kernel/terminal.rs
  42. 1 0
      src/kernel/timer.rs
  43. 1 0
      src/kernel/user.rs
  44. 14 0
      src/kernel/vfs/dentry.rs
  45. 22 6
      src/kernel/vfs/file.rs
  46. 6 3
      src/kernel/vfs/filearray.rs
  47. 4 0
      src/kernel/vfs/inode.rs
  48. 1 0
      src/kernel/vfs/mod.rs
  49. 6 1
      src/kernel/vfs/mount.rs
  50. 1 0
      src/kernel/vfs/vfs.rs
  51. 3 3
      src/lib.rs
  52. 3 5
      src/net/netdev.rs
  53. 9 0
      src/path.rs
  54. 3 0
      src/prelude.rs
  55. 1 0
      src/rcu.rs
  56. 2 0
      src/sync.rs
  57. 49 0
      src/sync/arcswap.rs

+ 1 - 12
CMakeLists.txt

@@ -85,18 +85,7 @@ add_custom_command(OUTPUT mbr_hole.bin
 add_custom_target(boot.img
     DEPENDS mbr_hole.bin
     DEPENDS user_space_programs
-    COMMAND dd if=mbr_hole.bin of=boot.img
-    COMMAND dd if=/dev/zero of=boot.img bs=`expr 512 \\* 1024 \\* 1024` count=0 seek=1
-    COMMAND sh -c \"echo n\; echo\; echo \; echo 8192\; echo\; echo a\; echo w\" | ${FDISK_BIN} boot.img
-    COMMAND mkfs.fat --offset=8192 -v -n SYSTEM boot.img
-    COMMAND mcopy -i boot.img@@4M ${CMAKE_BINARY_DIR}/user-space-program/hello-world.out ::hello
-    COMMAND mcopy -i boot.img@@4M ${CMAKE_BINARY_DIR}/user-space-program/interrupt-test.out ::int
-    COMMAND mcopy -i boot.img@@4M ${CMAKE_BINARY_DIR}/user-space-program/stack-test.out ::stack
-    COMMAND mcopy -i boot.img@@4M ${CMAKE_BINARY_DIR}/user-space-program/init.out ::init
-    COMMAND mcopy -i boot.img@@4M ${CMAKE_BINARY_DIR}/user-space-program/priv-test.out ::priv
-    COMMAND mcopy -i boot.img@@4M ${CMAKE_SOURCE_DIR}/busybox-minimal ::busybox_
-    COMMAND mcopy -i boot.img@@4M ${CMAKE_SOURCE_DIR}/busybox ::busybox
-    COMMAND mcopy -i boot.img@@4M ${CMAKE_SOURCE_DIR}/init_script.sh ::initsh
+    COMMAND make -C ${CMAKE_SOURCE_DIR} image
 )
 
 add_custom_command(OUTPUT run

+ 1 - 1
Cargo.toml

@@ -7,7 +7,7 @@ edition = "2021"
 crate-type = ["staticlib"]
 
 [dependencies]
-arch = { path="./arch" }
+arch = { path = "./arch" }
 bitflags = "2.6.0"
 itertools = { version = "0.13.0", default-features = false }
 lazy_static = { version = "1.5.0", features = ["spin_no_std"] }

+ 13 - 0
Makefile.src

@@ -7,6 +7,8 @@ QEMU_ARGS=-machine q35 -drive id=disk,file=build/boot.img,format=raw,if=none \
 	-device ahci,id=ahci -device ide-hd,drive=disk,bus=ahci.0 -smp 4 \
 	-no-reboot -no-shutdown $(QEMU_ACCELERATION_FLAG) $(QEMU_DEBUG_FLAG)
 
+FDISK_BIN ?= ##PLACEHOLDER_5##
+
 CROSS_COMPILE=##PLACEHOLDER_4##
 .PHONY: run
 run: build
@@ -60,6 +62,17 @@ tmux-debug:
 	-tmux attach -t gbos-debug
 	tmux kill-session -t gbos-debug
 
+build/fs.img: init_script.sh
+	sh script/build-img.sh
+
+build/boot.img: build/fs.img build/mbr_hole.bin
+	dd if=build/mbr_hole.bin of=build/boot.img
+	dd if=build/fs.img of=build/boot.img bs=$(shell expr 4 \* 1024 \* 1024) seek=1 conv=notrunc
+	sh -c 'echo n; echo; echo; echo 8192; echo; echo a; echo w' | $(FDISK_BIN) build/boot.img
+
 build/boot.vdi: build/boot.img
 	-rm build/boot.vdi
 	VBoxManage convertfromraw $< $@ --format VDI
+
+.PHONY: image
+image: build/boot.img

+ 5 - 4
configure

@@ -144,8 +144,9 @@ else
 fi
 
 cp Makefile.src Makefile
-sed -i '' -e "s/##PLACEHOLDER_1##/$QEMU/" Makefile > /dev/null 2>&1
-sed -i '' -e "s/##PLACEHOLDER_2##/$GDB/" Makefile > /dev/null 2>&1
-sed -i '' -e "s/##PLACEHOLDER_3##/$QEMU_ACCEL/" Makefile > /dev/null 2>&1
-sed -i '' -e "s/##PLACEHOLDER_4##/$CROSS_COMPILE_FLAG/" Makefile > /dev/null 2>&1
+sed -i '' -e "s|##PLACEHOLDER_1##|$QEMU|" Makefile > /dev/null 2>&1
+sed -i '' -e "s|##PLACEHOLDER_2##|$GDB|" Makefile > /dev/null 2>&1
+sed -i '' -e "s|##PLACEHOLDER_3##|$QEMU_ACCEL|" Makefile > /dev/null 2>&1
+sed -i '' -e "s|##PLACEHOLDER_4##|$CROSS_COMPILE_FLAG|" Makefile > /dev/null 2>&1
+sed -i '' -e "s|##PLACEHOLDER_5##|$FDISK_BIN|" Makefile > /dev/null 2>&1
 exit 0

+ 19 - 0
init_script.sh

@@ -59,4 +59,23 @@ alias ll="ls -l "
 alias la="ls -la "
 EOF
 
+cat > /root/test.c <<EOF
+#include <stdio.h>
+
+int main() {
+    int var = 0;
+    printf("Hello, world!\n");
+    printf("Please input a number: \n");
+    scanf("%d", &var);
+    if (var > 0) {
+        printf("You typed a positive number.\n");
+    } else if (var == 0 ) {
+        printf("You input a zero.\n");
+    } else {
+        printf("You typed a negative number.\n");
+    }
+    return 0;
+}
+EOF
+
 exec /mnt/init /bin/sh -c 'exec sh -l < /dev/ttyS0 > /dev/ttyS0 2> /dev/ttyS0'

+ 35 - 0
script/build-img.sh

@@ -0,0 +1,35 @@
+#!/bin/sh
+
+OS=`uname -s`
+
+dd if=/dev/zero of=build/fs.img bs=`expr 1024 \* 1024` count=512
+mkfs.fat -n SYSTEM build/fs.img
+
+if [ "$OS" = "Darwin" ]; then
+    hdiutil detach build/mnt > /dev/null 2>&1 || true
+    hdiutil attach build/fs.img -mountpoint build/mnt
+else
+    mkdir -p build/mnt
+    sudo mount disk.img build/mnt
+fi
+
+cp build/user-space-program/hello-world.out build/mnt/hello
+cp build/user-space-program/interrupt-test.out build/mnt/int
+cp build/user-space-program/stack-test.out build/mnt/stack
+cp build/user-space-program/init.out build/mnt/init
+cp build/user-space-program/priv-test.out build/mnt/priv
+cp ./busybox build/mnt/busybox
+cp ./busybox-minimal build/mnt/busybox_
+cp ./init_script.sh build/mnt/initsh
+
+# Add your custom files here
+
+cp -r $HOME/.local/i486-linux-musl-cross build/mnt/
+
+# End of custom files
+
+if [ "$OS" = "Darwin" ]; then
+    hdiutil detach build/mnt
+else
+    sudo umount build/mnt
+fi

+ 2 - 0
src/driver/ahci/control.rs

@@ -8,6 +8,7 @@ use super::{BitsIterator, GHC_IE};
 ///
 /// All reads and writes to this struct is volatile
 ///
+#[allow(dead_code)]
 #[repr(C)]
 struct AdapterControlData {
     capabilities: u32,
@@ -26,6 +27,7 @@ struct AdapterControlData {
     vendor: [u8; 96],
 }
 
+#[allow(dead_code)]
 const CONTROL_CAP: usize = 0;
 const CONTROL_GHC: usize = 1;
 const CONTROL_IS: usize = 2;

+ 3 - 1
src/driver/ahci/defs.rs

@@ -240,7 +240,9 @@ impl PRDTEntry {
         self.base = page.as_phys() as u64;
         self._reserved1 = 0;
 
-        self.shared = 0x80000000 | (page.len() as u32 & 0x3fffff);
+        // The last bit MUST be set to 1 according to the AHCI spec
+        let len = page.len() as u32 - 1;
+        self.shared = 0x80000000 | (len & 0x3fffff);
     }
 }
 

+ 1 - 0
src/driver/ahci/mod.rs

@@ -61,6 +61,7 @@ fn vwrite<T: Sized + Copy>(refval: *mut T, val: T) {
     unsafe { refval.write_volatile(val) }
 }
 
+#[allow(dead_code)]
 struct Device {
     control_base: usize,
     control: AdapterControl,

+ 2 - 0
src/driver/ahci/port.rs

@@ -36,6 +36,7 @@ fn spinwait_clear(refval: *const u32, mask: u32) -> KResult<()> {
 ///
 /// All reads and writes to this struct is volatile
 ///
+#[allow(dead_code)]
 #[repr(C)]
 pub struct AdapterPortData {
     pub command_list_base: u64,
@@ -65,6 +66,7 @@ pub struct AdapterPortData {
     vendor: [u32; 4],
 }
 
+#[allow(dead_code)]
 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
 enum SlotState {
     Idle,

+ 10 - 18
src/driver/e1000e.rs

@@ -2,11 +2,11 @@ use crate::prelude::*;
 
 use crate::bindings::root::kernel::hw::pci;
 use crate::kernel::interrupt::register_irq_handler;
-use crate::kernel::mem::paging::copy_to_page;
 use crate::kernel::mem::{paging, phys};
 use crate::net::netdev;
 use alloc::boxed::Box;
 use alloc::vec::Vec;
+use bindings::EFAULT;
 use paging::Page;
 use phys::{NoCachePP, PhysPtr};
 
@@ -105,9 +105,7 @@ impl netdev::Netdev for E1000eDev {
         match status & defs::STAT_SPEED_MASK {
             defs::STAT_SPEED_10M => self.speed = netdev::LinkSpeed::Speed10M,
             defs::STAT_SPEED_100M => self.speed = netdev::LinkSpeed::Speed100M,
-            defs::STAT_SPEED_1000M => {
-                self.speed = netdev::LinkSpeed::Speed1000M
-            }
+            defs::STAT_SPEED_1000M => self.speed = netdev::LinkSpeed::Speed1000M,
             _ => return Err(EINVAL),
         }
 
@@ -163,17 +161,9 @@ impl netdev::Netdev for E1000eDev {
             let len = desc.length as usize;
 
             let buffers = self.rx_buffers.as_mut().ok_or(EIO)?;
-            let data = unsafe {
-                core::slice::from_raw_parts(
-                    buffers[next_tail as usize].as_cached().as_ptr::<u8>(),
-                    len,
-                )
-            };
-
-            println_debug!(
-                "e1000e: received {len} bytes, {:?}",
-                PrintableBytes(data)
-            );
+            let data = &buffers[next_tail as usize].as_slice()[..len];
+
+            println_debug!("e1000e: received {len} bytes, {:?}", PrintableBytes(data));
             self.rx_tail = Some(next_tail);
         }
 
@@ -195,7 +185,10 @@ impl netdev::Netdev for E1000eDev {
         }
 
         let buffer_page = Page::alloc_one();
-        copy_to_page(buf, &buffer_page)?;
+        if buf.len() > buffer_page.len() {
+            return Err(EFAULT);
+        }
+        buffer_page.as_mut_slice()[..buf.len()].copy_from_slice(buf);
 
         desc.buffer = buffer_page.as_phys() as u64;
         desc.length = buf.len() as u16;
@@ -437,8 +430,7 @@ pub fn register_e1000e_driver() {
     let dev_ids = [0x100e, 0x10d3, 0x10ea, 0x153a];
 
     for id in dev_ids.into_iter() {
-        let ret =
-            unsafe { pci::register_driver_r(0x8086, id, Some(probe_device)) };
+        let ret = unsafe { pci::register_driver_r(0x8086, id, Some(probe_device)) };
 
         assert_eq!(ret, 0);
     }

+ 1 - 0
src/driver/serial.rs

@@ -11,6 +11,7 @@ use crate::{
 
 use super::Port8;
 
+#[allow(dead_code)]
 struct Serial {
     id: u32,
     name: Arc<str>,

+ 45 - 74
src/elf.rs

@@ -2,12 +2,10 @@ use alloc::{ffi::CString, sync::Arc};
 use bitflags::bitflags;
 
 use crate::{
-    io::{RawBuffer, UninitBuffer},
+    io::{ByteBuffer, UninitBuffer},
     kernel::{
         constants::ENOEXEC,
         mem::{FileMapping, MMList, Mapping, Permission, VAddr},
-        task::Thread,
-        user::{dataflow::CheckedUserPointer, UserPointerMut},
         vfs::dentry::Dentry,
     },
     prelude::*,
@@ -204,7 +202,7 @@ impl ParsedElf32 {
         let mut header = UninitBuffer::<Elf32Header>::new();
         file.read(&mut header, 0)?;
 
-        let header = header.assume_init().ok_or(ENOEXEC)?;
+        let header = header.assume_init().map_err(|_| ENOEXEC)?;
         if !header.check_valid() {
             return Err(ENOEXEC);
         }
@@ -212,7 +210,7 @@ impl ParsedElf32 {
         // TODO: Use `UninitBuffer` for `phents` and `shents`.
         let mut phents = vec![Elf32PhEntry::default(); header.ph_entry_count as usize];
         let nread = file.read(
-            &mut RawBuffer::new_from_slice(phents.as_mut_slice()),
+            &mut ByteBuffer::from(phents.as_mut_slice()),
             header.ph_offset as usize,
         )?;
         if nread != header.ph_entry_count as usize * size_of::<Elf32PhEntry>() {
@@ -221,7 +219,7 @@ impl ParsedElf32 {
 
         let mut shents = vec![Elf32ShEntry::default(); header.sh_entry_count as usize];
         let nread = file.read(
-            &mut RawBuffer::new_from_slice(shents.as_mut_slice()),
+            &mut ByteBuffer::from(shents.as_mut_slice()),
             header.sh_offset as usize,
         )?;
         if nread != header.sh_entry_count as usize * size_of::<Elf32ShEntry>() {
@@ -236,20 +234,15 @@ impl ParsedElf32 {
         })
     }
 
-    /// Load the ELF file into memory. Return the entry point address.
+    /// Load the ELF file into memory. Return the entry point address and the memory list containing the program data.
     ///
     /// We clear the user space and load the program headers into memory.
     /// Can't make a way back if failed from now on.
     ///
     /// # Return
-    /// `(entry_ip, sp)`
-    pub fn load(
-        self,
-        mm_list: &MMList,
-        args: Vec<CString>,
-        envs: Vec<CString>,
-    ) -> KResult<(VAddr, VAddr)> {
-        mm_list.clear_user();
+    /// `(entry_ip, sp, mm_list)`
+    pub fn load(self, args: Vec<CString>, envs: Vec<CString>) -> KResult<(VAddr, VAddr, MMList)> {
+        let mm_list = MMList::new();
 
         let mut data_segment_end = VAddr(0);
         for phent in self
@@ -312,67 +305,45 @@ impl ParsedElf32 {
             },
         )?;
 
-        // TODO!!!!!: A temporary workaround.
-        mm_list.switch_page_table();
-
-        let mut sp = 0xc0000000u32;
-        let arg_addrs = args
-            .into_iter()
-            .map(|arg| push_string(&mut sp, arg))
-            .collect::<Vec<_>>();
-
-        let env_addrs = envs
-            .into_iter()
-            .map(|env| push_string(&mut sp, env))
-            .collect::<Vec<_>>();
-
-        let longs = 2 // Null auxiliary vector entry
-            + env_addrs.len() + 1 // Envs + null
-            + arg_addrs.len() + 1 // Args + null
-            + 1; // argc
-
-        sp -= longs as u32 * 4;
-        sp &= !0xf; // Align to 16 bytes
-
-        let mut cursor = (0..longs)
-            .map(|idx| UserPointerMut::<u32>::new_vaddr(sp as usize + size_of::<u32>() * idx));
-
-        // argc
-        cursor.next().unwrap()?.write(arg_addrs.len() as u32)?;
-
-        // args
-        for arg_addr in arg_addrs.into_iter() {
-            cursor.next().unwrap()?.write(arg_addr)?;
-        }
-        cursor.next().unwrap()?.write(0)?; // null
-
-        // envs
-        for env_addr in env_addrs.into_iter() {
-            cursor.next().unwrap()?.write(env_addr)?;
-        }
-        cursor.next().unwrap()?.write(0)?; // null
-
-        // Null auxiliary vector
-        cursor.next().unwrap()?.write(0)?; // AT_NULL
-        cursor.next().unwrap()?.write(0)?; // AT_NULL
-
-        // TODO!!!!!: A temporary workaround.
-        Thread::current().process.mm_list.switch_page_table();
-
-        assert!(cursor.next().is_none());
-        Ok((VAddr(self.entry as usize), VAddr(sp as usize)))
+        let mut sp = VAddr::from(0xc0000000); // Current stack top
+        let arg_addrs = push_strings(&mm_list, &mut sp, args)?;
+        let env_addrs = push_strings(&mm_list, &mut sp, envs)?;
+
+        let mut longs = vec![];
+        longs.push(arg_addrs.len() as u32); // argc
+        longs.extend(arg_addrs.into_iter()); // args
+        longs.push(0); // null
+        longs.extend(env_addrs.into_iter()); // envs
+        longs.push(0); // null
+        longs.push(0); // AT_NULL
+        longs.push(0); // AT_NULL
+
+        sp = sp - longs.len() * size_of::<u32>();
+        sp = VAddr::from(usize::from(sp) & !0xf); // Align to 16 bytes
+
+        mm_list.access_mut(sp, longs.len() * size_of::<u32>(), |offset, data| {
+            data.copy_from_slice(unsafe {
+                core::slice::from_raw_parts(
+                    longs.as_ptr().byte_add(offset) as *const u8,
+                    data.len(),
+                )
+            })
+        })?;
+
+        Ok((VAddr(self.entry as usize), sp, mm_list))
     }
 }
 
-fn push_string(sp: &mut u32, string: CString) -> u32 {
-    let data = string.as_bytes_with_nul();
-    let new_sp = (*sp - data.len() as u32) & !0x3; // Align to 4 bytes
-
-    CheckedUserPointer::new(new_sp as *const u8, data.len())
-        .unwrap()
-        .write(data.as_ptr() as _, data.len())
-        .unwrap();
+fn push_strings(mm_list: &MMList, sp: &mut VAddr, strings: Vec<CString>) -> KResult<Vec<u32>> {
+    let mut addrs = vec![];
+    for string in strings {
+        let len = string.as_bytes_with_nul().len();
+        *sp = *sp - len;
+        mm_list.access_mut(*sp, len, |offset, data| {
+            data.copy_from_slice(&string.as_bytes_with_nul()[offset..offset + data.len()])
+        })?;
+        addrs.push(usize::from(*sp) as u32);
+    }
 
-    *sp = new_sp;
-    new_sp
+    Ok(addrs)
 }

+ 35 - 180
src/fs/fat32.rs

@@ -5,16 +5,17 @@ use alloc::{
     sync::{Arc, Weak},
     vec::Vec,
 };
-use bindings::{EINVAL, EIO};
+use bindings::EIO;
 
-use itertools::Itertools;
+use dir::Dirs as _;
+use file::ClusterRead;
 
 use crate::{
-    io::{Buffer, RawBuffer, UninitBuffer},
+    io::{Buffer, ByteBuffer, UninitBuffer},
     kernel::{
         block::{make_device, BlockDevice, BlockDeviceRequest},
         constants::{S_IFDIR, S_IFREG},
-        mem::{paging::Page, phys::PhysPtr},
+        mem::paging::Page,
         vfs::{
             dentry::Dentry,
             inode::{define_struct_inode, Ino, Inode, InodeData},
@@ -27,82 +28,10 @@ use crate::{
     KResult,
 };
 
-type ClusterNo = u32;
-
-const ATTR_RO: u8 = 0x01;
-const ATTR_HIDDEN: u8 = 0x02;
-const ATTR_SYSTEM: u8 = 0x04;
-const ATTR_VOLUME_ID: u8 = 0x08;
-const ATTR_DIRECTORY: u8 = 0x10;
-const ATTR_ARCHIVE: u8 = 0x20;
-
-const RESERVED_FILENAME_LOWERCASE: u8 = 0x08;
-
-#[repr(C, packed)]
-struct FatDirectoryEntry {
-    name: [u8; 8],
-    extension: [u8; 3],
-    attr: u8,
-    reserved: u8,
-    create_time_tenth: u8,
-    create_time: u16,
-    create_date: u16,
-    access_date: u16,
-    cluster_high: u16,
-    modify_time: u16,
-    modify_date: u16,
-    cluster_low: u16,
-    size: u32,
-}
-
-impl FatDirectoryEntry {
-    pub fn filename(&self) -> Arc<[u8]> {
-        let fnpos = self.name.iter().position(|&c| c == ' ' as u8).unwrap_or(8);
-        let mut name = self.name[..fnpos].to_vec();
-
-        let extpos = self
-            .extension
-            .iter()
-            .position(|&c| c == ' ' as u8)
-            .unwrap_or(3);
-
-        if extpos != 0 {
-            name.push('.' as u8);
-            name.extend_from_slice(&self.extension[..extpos]);
-        }
-
-        if self.reserved & RESERVED_FILENAME_LOWERCASE != 0 {
-            name.make_ascii_lowercase();
-        }
-
-        name.into()
-    }
-
-    pub fn ino(&self) -> Ino {
-        let cluster_high = (self.cluster_high as u32) << 16;
-        (self.cluster_low as u32 | cluster_high) as Ino
-    }
-
-    fn is_volume_id(&self) -> bool {
-        self.attr & ATTR_VOLUME_ID != 0
-    }
-
-    fn is_free(&self) -> bool {
-        self.name[0] == 0x00
-    }
-
-    fn is_deleted(&self) -> bool {
-        self.name[0] == 0xE5
-    }
-
-    fn is_invalid(&self) -> bool {
-        self.is_volume_id() || self.is_free() || self.is_deleted()
-    }
+mod dir;
+mod file;
 
-    fn is_directory(&self) -> bool {
-        self.attr & ATTR_DIRECTORY != 0
-    }
-}
+type ClusterNo = u32;
 
 #[derive(Clone, Copy)]
 #[repr(C, packed)]
@@ -230,17 +159,13 @@ impl FatFs {
             0,
         );
 
-        let mut buffer = RawBuffer::new_from_slice(fat.as_mut_slice());
+        let mut buffer = ByteBuffer::from(fat.as_mut_slice());
 
         fatfs
             .device
             .read_some(info.reserved_sectors as usize * 512, &mut buffer)?
             .ok_or(EIO)?;
 
-        if !buffer.filled() {
-            return Err(EIO);
-        }
-
         info.volume_label
             .iter()
             .take_while(|&&c| c != ' ' as u8)
@@ -265,63 +190,6 @@ impl<'fat> ClusterIterator<'fat> {
     fn new(fat: &'fat [ClusterNo], start: ClusterNo) -> Self {
         Self { fat, cur: start }
     }
-
-    fn read<'closure, 'vfs>(
-        self,
-        vfs: &'vfs FatFs,
-        offset: usize,
-    ) -> impl Iterator<Item = KResult<&'closure [u8]>> + 'closure
-    where
-        'fat: 'closure,
-        'vfs: 'closure,
-    {
-        let cluster_size = vfs.sectors_per_cluster as usize * 512;
-
-        let skip_count = offset / cluster_size;
-        let mut inner_offset = offset % cluster_size;
-
-        let page_buffer = Page::alloc_one();
-
-        self.skip(skip_count).map(move |cluster| {
-            vfs.read_cluster(cluster, &page_buffer)?;
-
-            let data = page_buffer
-                .as_cached()
-                .as_slice::<u8>(page_buffer.len())
-                .split_at(inner_offset)
-                .1;
-            inner_offset = 0;
-
-            Ok(data)
-        })
-    }
-
-    fn dirs<'closure, 'vfs>(
-        self,
-        vfs: &'vfs FatFs,
-        offset: usize,
-    ) -> impl Iterator<Item = KResult<&'closure FatDirectoryEntry>> + 'closure
-    where
-        'fat: 'closure,
-        'vfs: 'closure,
-    {
-        const ENTRY_SIZE: usize = core::mem::size_of::<FatDirectoryEntry>();
-        self.read(vfs, offset)
-            .map(|result| {
-                let data = result?;
-                if data.len() % ENTRY_SIZE != 0 {
-                    return Err(EINVAL);
-                }
-
-                Ok(unsafe {
-                    core::slice::from_raw_parts(
-                        data.as_ptr() as *const FatDirectoryEntry,
-                        data.len() / ENTRY_SIZE,
-                    )
-                })
-            })
-            .flatten_ok()
-    }
 }
 
 impl<'fat> Iterator for ClusterIterator<'fat> {
@@ -340,6 +208,7 @@ impl<'fat> Iterator for ClusterIterator<'fat> {
     }
 }
 
+#[allow(dead_code)]
 #[derive(Clone)]
 enum FatInode {
     File(Arc<FileInode>),
@@ -380,6 +249,10 @@ impl Inode for FileInode {
         let vfs = vfs.as_any().downcast_ref::<FatFs>().unwrap();
         let fat = vfs.fat.lock_shared();
 
+        if self.size.load(Ordering::Relaxed) as usize == 0 {
+            return Ok(0);
+        }
+
         let iter = ClusterIterator::new(fat.as_ref(), self.ino as ClusterNo).read(vfs, offset);
 
         for data in iter {
@@ -417,34 +290,25 @@ impl Inode for DirInode {
         let vfs = vfs.as_any().downcast_ref::<FatFs>().unwrap();
         let fat = vfs.fat.lock_shared();
 
-        let mut entries = ClusterIterator::new(fat.as_ref(), self.ino as ClusterNo).dirs(vfs, 0);
-
-        let entry = entries.find_map(|entry| {
-            if entry.is_err() {
-                return Some(entry);
-            }
-
-            let entry = entry.unwrap();
+        let mut entries = ClusterIterator::new(fat.as_ref(), self.ino as ClusterNo)
+            .read(vfs, 0)
+            .dirs();
 
-            if !entry.is_invalid() && entry.filename().eq(dentry.name()) {
-                Some(Ok(entry))
-            } else {
-                None
-            }
+        let entry = entries.find(|entry| {
+            entry
+                .as_ref()
+                .map(|entry| &entry.filename == dentry.name())
+                .unwrap_or(true)
         });
 
         match entry {
             None => Ok(None),
             Some(Err(err)) => Err(err),
-            Some(Ok(entry)) => {
-                let ino = entry.ino();
-
-                Ok(Some(vfs.get_or_alloc_inode(
-                    ino,
-                    entry.is_directory(),
-                    entry.size,
-                )))
-            }
+            Some(Ok(entry)) => Ok(Some(vfs.get_or_alloc_inode(
+                entry.cluster as Ino,
+                entry.is_directory,
+                entry.size,
+            ))),
         }
     }
 
@@ -457,32 +321,23 @@ impl Inode for DirInode {
         let vfs = vfs.as_any().downcast_ref::<FatFs>().unwrap();
         let fat = vfs.fat.lock_shared();
 
-        const ENTRY_SIZE: usize = core::mem::size_of::<FatDirectoryEntry>();
-        let cluster_iter =
-            ClusterIterator::new(fat.as_ref(), self.ino as ClusterNo).dirs(vfs, offset);
+        let cluster_iter = ClusterIterator::new(fat.as_ref(), self.ino as ClusterNo)
+            .read(vfs, offset)
+            .dirs();
 
-        let mut nread = 0;
+        let mut nread = 0usize;
         for entry in cluster_iter {
             let entry = entry?;
 
-            if entry.is_invalid() {
-                nread += 1;
-                continue;
-            }
-
-            let ino = entry.ino();
-            let name = entry.filename();
-
-            vfs.get_or_alloc_inode(ino, entry.is_directory(), entry.size);
-
-            if callback(name.as_ref(), ino)?.is_break() {
+            vfs.get_or_alloc_inode(entry.cluster as Ino, entry.is_directory, entry.size);
+            if callback(&entry.filename, entry.cluster as Ino)?.is_break() {
                 break;
             }
 
-            nread += 1;
+            nread += entry.entry_offset as usize;
         }
 
-        Ok(nread * ENTRY_SIZE)
+        Ok(nread)
     }
 }
 

+ 240 - 0
src/fs/fat32/dir.rs

@@ -0,0 +1,240 @@
+use crate::prelude::*;
+
+use alloc::{string::String, sync::Arc};
+use itertools::Itertools;
+
+use super::{bindings::EINVAL, file::ClusterReadIterator};
+
+#[repr(C, packed)]
+pub(super) struct RawDirEntry {
+    name: [u8; 8],
+    extension: [u8; 3],
+    attr: u8,
+    reserved: u8,
+    create_time_tenth: u8,
+    create_time: u16,
+    create_date: u16,
+    access_date: u16,
+    cluster_high: u16,
+    modify_time: u16,
+    modify_date: u16,
+    cluster_low: u16,
+    size: u32,
+}
+
+pub(super) struct FatDirectoryEntry {
+    pub filename: Arc<[u8]>,
+    pub cluster: u32,
+    pub size: u32,
+    pub entry_offset: u32,
+    pub is_directory: bool,
+    // TODO:
+    // create_time: u32,
+    // modify_time: u32,
+}
+
+impl RawDirEntry {
+    const ATTR_RO: u8 = 0x01;
+    const ATTR_HIDDEN: u8 = 0x02;
+    const ATTR_SYSTEM: u8 = 0x04;
+    const ATTR_VOLUME_ID: u8 = 0x08;
+    const ATTR_DIRECTORY: u8 = 0x10;
+    #[allow(dead_code)]
+    const ATTR_ARCHIVE: u8 = 0x20;
+
+    const RESERVED_FILENAME_LOWERCASE: u8 = 0x08;
+
+    fn filename(&self) -> &[u8] {
+        self.name.trim_ascii_end()
+    }
+
+    fn extension(&self) -> &[u8] {
+        self.extension.trim_ascii_end()
+    }
+
+    fn is_filename_lowercase(&self) -> bool {
+        self.reserved & Self::RESERVED_FILENAME_LOWERCASE != 0
+    }
+
+    fn is_long_filename(&self) -> bool {
+        self.attr == (Self::ATTR_RO | Self::ATTR_HIDDEN | Self::ATTR_SYSTEM | Self::ATTR_VOLUME_ID)
+    }
+
+    fn is_volume_id(&self) -> bool {
+        self.attr & Self::ATTR_VOLUME_ID != 0
+    }
+
+    fn is_free(&self) -> bool {
+        self.name[0] == 0x00
+    }
+
+    fn is_deleted(&self) -> bool {
+        self.name[0] == 0xE5
+    }
+
+    fn is_invalid(&self) -> bool {
+        self.is_volume_id() || self.is_free() || self.is_deleted()
+    }
+
+    fn is_directory(&self) -> bool {
+        self.attr & Self::ATTR_DIRECTORY != 0
+    }
+
+    fn long_filename(&self) -> Option<[u16; 13]> {
+        if !self.is_long_filename() {
+            return None;
+        }
+
+        let mut name = [0; 13];
+        name[0] = u16::from_le_bytes([self.name[1], self.name[2]]);
+        name[1] = u16::from_le_bytes([self.name[3], self.name[4]]);
+        name[2] = u16::from_le_bytes([self.name[5], self.name[6]]);
+        name[3] = u16::from_le_bytes([self.name[7], self.extension[0]]);
+        name[4] = u16::from_le_bytes([self.extension[1], self.extension[2]]);
+        name[5] = self.create_time;
+        name[6] = self.create_date;
+        name[7] = self.access_date;
+        name[8] = self.cluster_high;
+        name[9] = self.modify_time;
+        name[10] = self.modify_date;
+        name[11] = self.size as u16;
+        name[12] = (self.size >> 16) as u16;
+
+        Some(name)
+    }
+}
+
+impl<'data, I> RawDirs<'data> for I where I: ClusterReadIterator<'data> {}
+trait RawDirs<'data>: ClusterReadIterator<'data> {
+    fn raw_dirs(self) -> impl Iterator<Item = KResult<&'data RawDirEntry>> + 'data
+    where
+        Self: Sized,
+    {
+        const ENTRY_SIZE: usize = size_of::<RawDirEntry>();
+
+        self.map(|result| {
+            let data = result?;
+            if data.len() % ENTRY_SIZE != 0 {
+                return Err(EINVAL);
+            }
+
+            Ok(unsafe {
+                core::slice::from_raw_parts(
+                    data.as_ptr() as *const RawDirEntry,
+                    data.len() / ENTRY_SIZE,
+                )
+            })
+        })
+        .flatten_ok()
+    }
+}
+
+pub(super) trait Dirs<'data>: ClusterReadIterator<'data> {
+    fn dirs(self) -> impl Iterator<Item = KResult<FatDirectoryEntry>> + 'data
+    where
+        Self: Sized;
+}
+
+impl<'data, I> Dirs<'data> for I
+where
+    I: ClusterReadIterator<'data>,
+{
+    fn dirs(self) -> impl Iterator<Item = KResult<FatDirectoryEntry>> + 'data
+    where
+        Self: Sized,
+    {
+        self.raw_dirs().real_dirs()
+    }
+}
+
+trait RealDirs<'data>: Iterator<Item = KResult<&'data RawDirEntry>> + 'data {
+    fn real_dirs(self) -> DirsIter<'data, Self>
+    where
+        Self: Sized;
+}
+
+impl<'data, I> RealDirs<'data> for I
+where
+    I: Iterator<Item = KResult<&'data RawDirEntry>> + 'data,
+{
+    fn real_dirs(self) -> DirsIter<'data, Self>
+    where
+        Self: Sized,
+    {
+        DirsIter { iter: self }
+    }
+}
+
+pub(super) struct DirsIter<'data, I>
+where
+    I: Iterator<Item = KResult<&'data RawDirEntry>> + 'data,
+{
+    iter: I,
+}
+
+impl<'data, I> Iterator for DirsIter<'data, I>
+where
+    I: Iterator<Item = KResult<&'data RawDirEntry>> + 'data,
+{
+    type Item = KResult<FatDirectoryEntry>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let mut filename = String::new();
+        let mut entry_offset = 0;
+        let entry = loop {
+            let entry = match self.iter.next()? {
+                Ok(entry) => entry,
+                Err(err) => return Some(Err(err)),
+            };
+            entry_offset += 1;
+
+            let long_filename = entry.long_filename();
+            if entry.is_invalid() {
+                if let Some(long_filename) = long_filename {
+                    let long_filename = long_filename
+                        .iter()
+                        .position(|&ch| ch == 0)
+                        .map(|pos| &long_filename[..pos])
+                        .unwrap_or(&long_filename);
+
+                    filename.extend(
+                        long_filename
+                            .into_iter()
+                            .map(|&ch| char::from_u32(ch as u32).unwrap_or('?'))
+                            .rev(),
+                    );
+                }
+                continue;
+            }
+            break entry;
+        };
+
+        let filename: Arc<[u8]> = if filename.is_empty() {
+            let mut filename = entry.filename().to_vec();
+            let extension = entry.extension();
+            if !extension.is_empty() {
+                filename.push(b'.');
+                filename.extend_from_slice(extension);
+            }
+
+            if entry.is_filename_lowercase() {
+                filename.make_ascii_lowercase();
+            }
+
+            filename.into()
+        } else {
+            let mut bytes = filename.into_bytes();
+            bytes.reverse();
+
+            bytes.into()
+        };
+
+        Some(Ok(FatDirectoryEntry {
+            size: entry.size,
+            entry_offset,
+            filename,
+            cluster: entry.cluster_low as u32 | ((entry.cluster_high as u32) << 16),
+            is_directory: entry.is_directory(),
+        }))
+    }
+}

+ 34 - 0
src/fs/fat32/file.rs

@@ -0,0 +1,34 @@
+use crate::{kernel::mem::Page, KResult};
+
+use super::{ClusterIterator, FatFs};
+
+pub trait ClusterReadIterator<'data>: Iterator<Item = KResult<&'data [u8]>> + 'data {}
+impl<'a, I> ClusterReadIterator<'a> for I where I: Iterator<Item = KResult<&'a [u8]>> + 'a {}
+
+pub(super) trait ClusterRead<'data> {
+    fn read<'vfs>(self, vfs: &'vfs FatFs, offset: usize) -> impl ClusterReadIterator<'data>
+    where
+        Self: Sized,
+        'vfs: 'data;
+}
+
+impl<'data, 'fat: 'data> ClusterRead<'data> for ClusterIterator<'fat> {
+    fn read<'vfs: 'data>(self, vfs: &'vfs FatFs, offset: usize) -> impl ClusterReadIterator<'data> {
+        const SECTOR_SIZE: usize = 512;
+
+        let cluster_size = vfs.sectors_per_cluster as usize * SECTOR_SIZE;
+        assert!(cluster_size <= 0x1000, "Cluster size is too large");
+
+        let skip_clusters = offset / cluster_size;
+        let mut inner_offset = offset % cluster_size;
+
+        let buffer_page = Page::alloc_one();
+
+        self.skip(skip_clusters).map(move |cluster| {
+            vfs.read_cluster(cluster, &buffer_page)?;
+            let data = &buffer_page.as_slice()[inner_offset..];
+            inner_offset = 0;
+            Ok(data)
+        })
+    }
+}

+ 4 - 1
src/fs/procfs.rs

@@ -30,6 +30,7 @@ fn split_len_offset(data: &[u8], len: usize, offset: usize) -> Option<&[u8]> {
     real_data.split_at_checked(offset).map(|(_, data)| data)
 }
 
+#[allow(dead_code)]
 pub trait ProcFsFile: Send + Sync {
     fn can_read(&self) -> bool {
         false
@@ -116,7 +117,7 @@ impl Inode for FileInode {
 }
 
 define_struct_inode! {
-    struct DirInode {
+    pub struct DirInode {
         entries: Locked<Vec<(Arc<[u8]>, ProcFsNode)>, ()>,
     }
 }
@@ -196,6 +197,7 @@ lazy_static! {
 
 struct ProcFsMountCreator;
 
+#[allow(dead_code)]
 impl ProcFsMountCreator {
     pub fn get() -> Arc<ProcFs> {
         GLOBAL_PROCFS.clone()
@@ -247,6 +249,7 @@ pub fn creat(
     Ok(ProcFsNode::File(inode))
 }
 
+#[allow(dead_code)]
 pub fn mkdir(parent: &ProcFsNode, name: &[u8]) -> KResult<ProcFsNode> {
     let parent = match parent {
         ProcFsNode::File(_) => return Err(ENOTDIR),

+ 26 - 0
src/fs/tmpfs.rs

@@ -203,6 +203,17 @@ impl Inode for DirectoryInode {
 
         Ok(())
     }
+
+    fn chmod(&self, mode: Mode) -> KResult<()> {
+        let _vfs = acquire(&self.vfs)?;
+        let _lock = self.rwsem.lock();
+
+        // SAFETY: `rwsem` has done the synchronization
+        let old = self.mode.load(Ordering::Relaxed);
+        self.mode
+            .store((old & !0o777) | (mode & 0o777), Ordering::Relaxed);
+        Ok(())
+    }
 }
 
 define_struct_inode! {
@@ -229,6 +240,10 @@ impl Inode for SymlinkInode {
             .fill(self.target.as_ref())
             .map(|result| result.allow_partial())
     }
+
+    fn chmod(&self, _: Mode) -> KResult<()> {
+        Ok(())
+    }
 }
 
 define_struct_inode! {
@@ -298,6 +313,17 @@ impl Inode for FileInode {
 
         Ok(())
     }
+
+    fn chmod(&self, mode: Mode) -> KResult<()> {
+        let _vfs = acquire(&self.vfs)?;
+        let _lock = self.rwsem.lock();
+
+        // SAFETY: `rwsem` has done the synchronization
+        let old = self.mode.load(Ordering::Relaxed);
+        self.mode
+            .store((old & !0o777) | (mode & 0o777), Ordering::Relaxed);
+        Ok(())
+    }
 }
 
 impl_any!(TmpFs);

+ 1 - 0
src/intrusive_list.rs

@@ -5,6 +5,7 @@ pub struct Link {
     next: Option<NonNull<Link>>,
 }
 
+#[allow(dead_code)]
 impl Link {
     pub const fn new() -> Self {
         Self {

+ 22 - 80
src/io.rs

@@ -82,7 +82,7 @@ where
 
 pub struct UninitBuffer<'lt, T: Copy + Sized> {
     data: Box<MaybeUninit<T>>,
-    buffer: RawBuffer<'lt>,
+    buffer: ByteBuffer<'lt>,
 }
 
 impl<'lt, T: Copy + Sized> UninitBuffer<'lt, T> {
@@ -92,25 +92,25 @@ impl<'lt, T: Copy + Sized> UninitBuffer<'lt, T> {
 
         Self {
             data,
-            buffer: RawBuffer::new_from_slice(unsafe {
+            buffer: ByteBuffer::from(unsafe {
                 core::slice::from_raw_parts_mut(ptr as *mut u8, core::mem::size_of::<T>())
             }),
         }
     }
 
     pub fn assume_filled_ref(&self) -> KResult<&T> {
-        if !self.buffer.filled() {
+        if self.buffer.available() != 0 {
             Err(EFAULT)
         } else {
             Ok(unsafe { self.data.assume_init_ref() })
         }
     }
 
-    pub fn assume_init(self) -> Option<T> {
-        if self.buffer.filled() {
-            Some(unsafe { *self.data.assume_init() })
+    pub fn assume_init(self) -> KResult<T> {
+        if self.buffer.available() != 0 {
+            Err(EFAULT)
         } else {
-            None
+            Ok(unsafe { *self.data.assume_init() })
         }
     }
 }
@@ -129,78 +129,6 @@ impl<'lt, T: Copy + Sized> Buffer for UninitBuffer<'lt, T> {
     }
 }
 
-pub struct RawBuffer<'lt> {
-    buf: *mut u8,
-    tot: usize,
-    cur: usize,
-    _phantom: core::marker::PhantomData<&'lt mut u8>,
-}
-
-impl<'lt> RawBuffer<'lt> {
-    pub fn new_from_slice<T: Copy + Sized>(buf: &'lt mut [T]) -> Self {
-        Self {
-            buf: buf.as_mut_ptr() as *mut u8,
-            tot: core::mem::size_of::<T>() * buf.len(),
-            cur: 0,
-            _phantom: core::marker::PhantomData,
-        }
-    }
-
-    pub fn count(&self) -> usize {
-        self.cur
-    }
-
-    pub fn total(&self) -> usize {
-        self.tot
-    }
-
-    pub fn available(&self) -> usize {
-        self.total() - self.count()
-    }
-
-    pub fn filled(&self) -> bool {
-        self.count() == self.total()
-    }
-
-    pub fn fill(&mut self, data: &[u8]) -> KResult<FillResult> {
-        match self.available() {
-            n if n == 0 => Ok(FillResult::Full),
-            n if n < data.len() => {
-                unsafe {
-                    core::ptr::copy_nonoverlapping(data.as_ptr(), self.buf.add(self.count()), n);
-                }
-                self.cur += n;
-                Ok(FillResult::Partial(n))
-            }
-            _ => {
-                unsafe {
-                    core::ptr::copy_nonoverlapping(
-                        data.as_ptr(),
-                        self.buf.add(self.count()),
-                        data.len(),
-                    );
-                }
-                self.cur += data.len();
-                Ok(FillResult::Done(data.len()))
-            }
-        }
-    }
-}
-
-impl Buffer for RawBuffer<'_> {
-    fn total(&self) -> usize {
-        RawBuffer::total(self)
-    }
-
-    fn wrote(&self) -> usize {
-        self.count()
-    }
-
-    fn fill(&mut self, data: &[u8]) -> KResult<FillResult> {
-        RawBuffer::fill(self, data)
-    }
-}
-
 pub struct ByteBuffer<'lt> {
     buf: &'lt mut [u8],
     cur: usize,
@@ -220,6 +148,20 @@ impl<'lt> ByteBuffer<'lt> {
     }
 }
 
+impl<'lt, T: Copy + Sized> From<&'lt mut [T]> for ByteBuffer<'lt> {
+    fn from(value: &'lt mut [T]) -> Self {
+        Self {
+            buf: unsafe {
+                core::slice::from_raw_parts_mut(
+                    value.as_ptr() as *mut u8,
+                    core::mem::size_of::<T>() * value.len(),
+                )
+            },
+            cur: 0,
+        }
+    }
+}
+
 impl Buffer for ByteBuffer<'_> {
     fn total(&self) -> usize {
         self.buf.len()
@@ -227,7 +169,7 @@ impl Buffer for ByteBuffer<'_> {
 
     fn fill(&mut self, data: &[u8]) -> KResult<FillResult> {
         match self.available() {
-            n if n == 0 => Ok(FillResult::Full),
+            0 => Ok(FillResult::Full),
             n if n < data.len() => {
                 self.buf[self.cur..].copy_from_slice(&data[..n]);
                 self.cur += n;

+ 1 - 0
src/kernel.rs

@@ -16,6 +16,7 @@ pub mod smp;
 mod chardev;
 mod terminal;
 
+#[allow(unused_imports)]
 pub use chardev::{CharDevice, CharDeviceType, VirtualCharDevice};
 pub use console::Console;
 pub use terminal::{Terminal, TerminalDevice};

+ 3 - 15
src/kernel/block.rs

@@ -13,10 +13,7 @@ use bindings::{EEXIST, EINVAL, EIO, ENOENT};
 
 use lazy_static::lazy_static;
 
-use super::{
-    mem::{paging::Page, phys::PhysPtr},
-    vfs::DevId,
-};
+use super::{mem::paging::Page, vfs::DevId};
 
 pub fn make_device(major: u32, minor: u32) -> DevId {
     (major << 8) & 0xff00u32 | minor & 0xffu32
@@ -262,17 +259,8 @@ impl BlockDevice {
             self.read_raw(req)?;
 
             for page in pages.iter() {
-                let data = if first_sector_offset != 0 {
-                    let ret = page
-                        .as_cached()
-                        .as_slice(page.len())
-                        .split_at(first_sector_offset as usize)
-                        .1;
-                    first_sector_offset = 0;
-                    ret
-                } else {
-                    page.as_cached().as_slice(page.len())
-                };
+                let data = &page.as_slice()[first_sector_offset as usize..];
+                first_sector_offset = 0;
 
                 match buffer.fill(data)? {
                     FillResult::Done(n) => nfilled += n,

+ 1 - 0
src/kernel/chardev.rs

@@ -29,6 +29,7 @@ pub enum CharDeviceType {
     Virtual(Box<dyn VirtualCharDevice>),
 }
 
+#[allow(dead_code)]
 pub struct CharDevice {
     name: Arc<str>,
     device: CharDeviceType,

+ 6 - 0
src/kernel/constants.rs

@@ -21,7 +21,9 @@ pub const CLOCK_MONOTONIC: u32 = 1;
 
 pub const ENXIO: u32 = 6;
 pub const ENOEXEC: u32 = 8;
+pub const ENOSYS: u32 = 38;
 
+#[allow(dead_code)]
 pub const S_IFIFO: u32 = 0o010000;
 pub const S_IFCHR: u32 = 0o020000;
 pub const S_IFDIR: u32 = 0o040000;
@@ -29,6 +31,10 @@ pub const S_IFBLK: u32 = 0o060000;
 pub const S_IFREG: u32 = 0o100000;
 pub const S_IFLNK: u32 = 0o120000;
 
+pub const RLIMIT_STACK: u32 = 0x3;
+
+pub const AT_EMPTY_PATH: u32 = 0x1000;
+
 bitflags! {
     #[derive(Debug, Clone, Copy)]
     pub struct UserMmapFlags: u32 {

+ 2 - 2
src/kernel/mem.rs

@@ -7,9 +7,9 @@ mod mm_list;
 mod page_alloc;
 mod page_table;
 
+#[allow(unused_imports)]
 pub use address::{PAddr, VAddr, VRange, PFN, VPN};
 pub(self) use mm_area::MMArea;
-pub use mm_list::{handle_page_fault, FileMapping, MMList, Mapping, PageFaultError, Permission};
-pub(self) use page_alloc::{alloc_page, alloc_pages, create_pages, free_pages, mark_present};
+pub use mm_list::{handle_page_fault, FileMapping, MMList, Mapping, Permission};
 pub(self) use page_table::{PageTable, PTE};
 pub use paging::{Page, PageBuffer};

+ 3 - 3
src/kernel/mem/address.rs

@@ -1,7 +1,7 @@
 use core::{
     cmp::Ordering,
     fmt::{self, Debug, Formatter},
-    ops::{Add, Sub, RangeBounds},
+    ops::{Add, RangeBounds, Sub},
 };
 
 #[repr(C)]
@@ -38,7 +38,7 @@ impl From<PFN> for usize {
 
 impl From<VAddr> for usize {
     fn from(v: VAddr) -> Self {
-       v.0
+        v.0
     }
 }
 
@@ -72,7 +72,6 @@ impl From<usize> for VPN {
     }
 }
 
-
 impl From<VPN> for VAddr {
     fn from(v: VPN) -> Self {
         Self(v.0 << PAGE_SIZE_BITS)
@@ -353,6 +352,7 @@ impl VRange {
         VRange { start, end }
     }
 
+    #[allow(dead_code)]
     pub fn is_overlapped(&self, other: &Self) -> bool {
         self == other
     }

+ 93 - 1
src/kernel/mem/mm_area.rs

@@ -1,6 +1,12 @@
+use crate::prelude::*;
+
+use bindings::PA_MMAP;
+
 use core::{borrow::Borrow, cell::UnsafeCell, cmp::Ordering};
 
-use super::{Mapping, Permission, VAddr, VRange};
+use crate::bindings::root::{PA_A, PA_ANON, PA_COW, PA_P, PA_RW};
+
+use super::{Mapping, Page, PageBuffer, Permission, VAddr, VRange, PTE};
 
 #[derive(Debug)]
 pub struct MMArea {
@@ -38,6 +44,7 @@ impl MMArea {
         *self.range_borrow()
     }
 
+    #[allow(dead_code)]
     pub fn len(&self) -> usize {
         self.range_borrow().len()
     }
@@ -76,6 +83,91 @@ impl MMArea {
             }
         }
     }
+
+    /// # Return
+    /// Whether the whole handling process is done.
+    pub fn handle_cow(&self, pte: &mut PTE) -> bool {
+        let mut attributes = pte.attributes();
+        let mut pfn = pte.pfn();
+
+        attributes &= !PA_COW as usize;
+        if self.permission.write {
+            attributes |= PA_RW as usize;
+        } else {
+            attributes &= !PA_RW as usize;
+        }
+
+        let page = unsafe { Page::take_pfn(pfn, 0) };
+        if unsafe { page.load_refcount() } == 1 {
+            // SAFETY: This is actually safe. If we read `1` here and we have `MMList` lock
+            // held, there couldn't be neither other processes sharing the page, nor other
+            // threads making the page COW at the same time.
+            pte.set_attributes(attributes);
+            core::mem::forget(page);
+            return true;
+        }
+
+        let new_page = Page::alloc_one();
+        if attributes & PA_ANON as usize != 0 {
+            new_page.zero();
+        } else {
+            new_page.as_mut_slice().copy_from_slice(page.as_slice());
+        }
+
+        attributes &= !(PA_A | PA_ANON) as usize;
+
+        pfn = new_page.into_pfn();
+        pte.set(pfn, attributes);
+
+        false
+    }
+
+    /// # Arguments
+    /// * `offset`: The offset from the start of the mapping, aligned to 4KB boundary.
+    pub fn handle_mmap(&self, pte: &mut PTE, offset: usize) -> KResult<()> {
+        // TODO: Implement shared mapping
+        let mut attributes = pte.attributes();
+        let pfn = pte.pfn();
+
+        attributes |= PA_P as usize;
+
+        match &self.mapping {
+            Mapping::File(mapping) if offset < mapping.length => {
+                // SAFETY: Since we are here, the `pfn` must refer to a valid buddy page.
+                let page = unsafe { Page::from_pfn(pfn, 0) };
+                let nread = mapping
+                    .file
+                    .read(&mut PageBuffer::new(page.clone()), mapping.offset + offset)?;
+
+                if nread < page.len() {
+                    page.as_mut_slice()[nread..].fill(0);
+                }
+
+                if mapping.length - offset < 0x1000 {
+                    let length_to_end = mapping.length - offset;
+                    page.as_mut_slice()[length_to_end..].fill(0);
+                }
+            }
+            Mapping::File(_) => panic!("Offset out of range"),
+            _ => panic!("Anonymous mapping should not be PA_MMAP"),
+        }
+
+        attributes &= !PA_MMAP as usize;
+        pte.set_attributes(attributes);
+        Ok(())
+    }
+
+    pub fn handle(&self, pte: &mut PTE, offset: usize) -> KResult<()> {
+        if pte.is_cow() {
+            self.handle_cow(pte);
+        }
+
+        if pte.is_mmap() {
+            self.handle_mmap(pte, offset)?;
+        }
+
+        Ok(())
+    }
 }
 
 impl Eq for MMArea {}

+ 160 - 62
src/kernel/mem/mm_list.rs

@@ -1,23 +1,25 @@
 mod page_fault;
 
-use crate::prelude::*;
+use core::sync::atomic::{AtomicUsize, Ordering};
+
+use crate::{prelude::*, sync::ArcSwap};
 
 use alloc::{collections::btree_set::BTreeSet, sync::Arc};
-use bindings::{EEXIST, EINVAL, ENOMEM};
+use bindings::{EEXIST, EFAULT, EINVAL, ENOMEM, KERNEL_PML4};
 
 use crate::kernel::vfs::dentry::Dentry;
 
-use super::{MMArea, PageTable, VAddr, VRange};
+use super::{MMArea, Page, PageTable, VAddr, VRange};
 
-pub use page_fault::{handle_page_fault, PageFaultError};
+pub use page_fault::handle_page_fault;
 
 #[derive(Debug, Clone)]
 pub struct FileMapping {
-    file: Arc<Dentry>,
+    pub file: Arc<Dentry>,
     /// Offset in the file, aligned to 4KB boundary.
-    offset: usize,
+    pub offset: usize,
     /// Length of the mapping. Exceeding part will be zeroed.
-    length: usize,
+    pub length: usize,
 }
 
 #[derive(Debug, Clone, Copy)]
@@ -35,6 +37,7 @@ pub enum Mapping {
 #[derive(Debug)]
 struct MMListInner {
     areas: BTreeSet<MMArea>,
+    page_table: PageTable,
     break_start: Option<VRange>,
     break_pos: Option<VAddr>,
 }
@@ -43,9 +46,9 @@ struct MMListInner {
 pub struct MMList {
     /// # Safety
     /// This field might be used in IRQ context, so it should be locked with `lock_irq()`.
-    inner: Mutex<MMListInner>,
-    /// Do not modify entries in the page table without acquiring the `inner` lock.
-    page_table: PageTable,
+    inner: ArcSwap<Mutex<MMListInner>>,
+    /// Only used in kernel space to switch page tables on context switch.
+    root_page_table: AtomicUsize,
 }
 
 impl FileMapping {
@@ -110,7 +113,7 @@ impl MMListInner {
         }
     }
 
-    fn unmap(&mut self, page_table: &PageTable, start: VAddr, len: usize) -> KResult<()> {
+    fn unmap(&mut self, start: VAddr, len: usize) -> KResult<()> {
         assert_eq!(start.floor(), start);
         let end = (start + len).ceil();
         let range = VRange::new(start, end);
@@ -128,7 +131,7 @@ impl MMListInner {
             }
             if area.range() == range.start().into() {
                 let (left, right) = area.clone().split(range.start());
-                page_table.unmap(&right.unwrap());
+                self.page_table.unmap(&right.unwrap());
 
                 if let Some(left) = left {
                     assert!(
@@ -138,7 +141,7 @@ impl MMListInner {
                 }
             } else if area.range() == range.end().into() {
                 let (left, right) = area.clone().split(range.end());
-                page_table.unmap(&left.unwrap());
+                self.page_table.unmap(&left.unwrap());
 
                 assert!(
                     back_remaining
@@ -147,7 +150,7 @@ impl MMListInner {
                     "There should be only one `back`."
                 );
             } else {
-                page_table.unmap(area);
+                self.page_table.unmap(area);
             }
 
             false
@@ -165,7 +168,6 @@ impl MMListInner {
 
     fn mmap(
         &mut self,
-        page_table: &PageTable,
         at: VAddr,
         len: usize,
         mapping: Mapping,
@@ -181,8 +183,8 @@ impl MMListInner {
         }
 
         match &mapping {
-            Mapping::Anonymous => page_table.set_anonymous(range, permission),
-            Mapping::File(_) => page_table.set_mmapped(range, permission),
+            Mapping::Anonymous => self.page_table.set_anonymous(range, permission),
+            Mapping::File(_) => self.page_table.set_mmapped(range, permission),
         }
 
         self.areas.insert(MMArea::new(range, mapping, permission));
@@ -191,36 +193,41 @@ impl MMListInner {
 }
 
 impl MMList {
-    pub fn new() -> Arc<Self> {
-        Arc::new(Self {
-            inner: Mutex::new(MMListInner {
+    pub fn new() -> Self {
+        let page_table = PageTable::new();
+        Self {
+            root_page_table: AtomicUsize::from(page_table.root_page_table()),
+            inner: ArcSwap::new(Mutex::new(MMListInner {
                 areas: BTreeSet::new(),
+                page_table,
                 break_start: None,
                 break_pos: None,
-            }),
-            page_table: PageTable::new(),
-        })
+            })),
+        }
     }
 
-    pub fn new_cloned(&self) -> Arc<Self> {
-        let inner = self.inner.lock_irq();
+    pub fn new_cloned(&self) -> Self {
+        let inner = self.inner.borrow();
+        let inner = inner.lock_irq();
 
-        let list = Arc::new(Self {
-            inner: Mutex::new(MMListInner {
+        let page_table = PageTable::new();
+        let list = Self {
+            root_page_table: AtomicUsize::from(page_table.root_page_table()),
+            inner: ArcSwap::new(Mutex::new(MMListInner {
                 areas: inner.areas.clone(),
+                page_table,
                 break_start: inner.break_start,
                 break_pos: inner.break_pos,
-            }),
-            page_table: PageTable::new(),
-        });
+            })),
+        };
 
-        // SAFETY: `self.inner` already locked with IRQ disabled.
         {
-            let list_inner = list.inner.lock();
+            let list_inner = list.inner.borrow();
+            let list_inner = list_inner.lock();
 
             for area in list_inner.areas.iter() {
-                let new_iter = list.page_table.iter_user(area.range()).unwrap();
-                let old_iter = self.page_table.iter_user(area.range()).unwrap();
+                let new_iter = list_inner.page_table.iter_user(area.range()).unwrap();
+                let old_iter = inner.page_table.iter_user(area.range()).unwrap();
 
                 for (new, old) in new_iter.zip(old_iter) {
                     new.setup_cow(old);
@@ -229,29 +236,54 @@ impl MMList {
         }
 
         // We set some pages as COW, so we need to invalidate TLB.
-        self.page_table.lazy_invalidate_tlb_all();
+        inner.page_table.lazy_invalidate_tlb_all();
 
         list
     }
 
-    /// No need to do invalidation manually, `PageTable` already does it.
-    pub fn clear_user(&self) {
-        let mut inner = self.inner.lock_irq();
-        inner.areas.retain(|area| {
-            self.page_table.unmap(area);
-            false
-        });
-        inner.break_start = None;
-        inner.break_pos = None;
+    pub fn switch_page_table(&self) {
+        let root_page_table = self.root_page_table.load(Ordering::Relaxed);
+        assert_ne!(root_page_table, 0);
+        arch::set_root_page_table(root_page_table);
     }
 
-    pub fn switch_page_table(&self) {
-        self.page_table.switch();
+    pub fn replace(&self, new: Self) {
+        // Switch to kernel page table in case we are using the page table to be swapped and released.
+        let mut switched = false;
+        if arch::get_root_page_table() == self.root_page_table.load(Ordering::Relaxed) {
+            arch::set_root_page_table(KERNEL_PML4 as usize);
+            switched = true;
+        }
+
+        unsafe {
+            // SAFETY: Even if we're using the page table, we've switched to kernel page table.
+            // So it's safe to release the old memory list.
+            self.release();
+        }
+
+        // SAFETY: `self.inner` should be `None` after releasing.
+        self.inner.swap(Some(new.inner.borrow().clone()));
+        self.root_page_table.store(
+            new.root_page_table.load(Ordering::Relaxed),
+            Ordering::Relaxed,
+        );
+
+        if switched {
+            self.switch_page_table();
+        }
+    }
+
+    /// # Safety
+    /// This function is unsafe because the caller should make sure that the `inner` is not currently used.
+    pub unsafe fn release(&self) {
+        // TODO: Check whether we should wake someone up if they've been put to sleep when calling `vfork`.
+        self.inner.swap(None);
+        self.root_page_table.store(0, Ordering::Relaxed);
     }
 
     /// No need to do invalidation manually, `PageTable` already does it.
     pub fn unmap(&self, start: VAddr, len: usize) -> KResult<()> {
-        self.inner.lock_irq().unmap(&self.page_table, start, len)
+        self.inner.borrow().lock_irq().unmap(start, len)
     }
 
     pub fn mmap_hint(
@@ -261,18 +293,20 @@ impl MMList {
         mapping: Mapping,
         permission: Permission,
     ) -> KResult<VAddr> {
-        let mut inner = self.inner.lock_irq();
+        let inner = self.inner.borrow();
+        let mut inner = inner.lock_irq();
+
         if hint == VAddr::NULL {
             let at = inner.find_available(hint, len).ok_or(ENOMEM)?;
-            inner.mmap(&self.page_table, at, len, mapping, permission)?;
+            inner.mmap(at, len, mapping, permission)?;
             return Ok(at);
         }
 
-        match inner.mmap(&self.page_table, hint, len, mapping.clone(), permission) {
+        match inner.mmap(hint, len, mapping.clone(), permission) {
             Ok(()) => Ok(hint),
             Err(EEXIST) => {
                 let at = inner.find_available(hint, len).ok_or(ENOMEM)?;
-                inner.mmap(&self.page_table, at, len, mapping, permission)?;
+                inner.mmap(at, len, mapping, permission)?;
                 Ok(at)
             }
             Err(err) => Err(err),
@@ -287,13 +321,15 @@ impl MMList {
         permission: Permission,
     ) -> KResult<VAddr> {
         self.inner
+            .borrow()
             .lock_irq()
-            .mmap(&self.page_table, at, len, mapping.clone(), permission)
+            .mmap(at, len, mapping.clone(), permission)
             .map(|_| at)
     }
 
     pub fn set_break(&self, pos: Option<VAddr>) -> VAddr {
-        let mut inner = self.inner.lock_irq();
+        let inner = self.inner.borrow();
+        let mut inner = inner.lock_irq();
 
         // SAFETY: `set_break` is only called in syscalls, where program break should be valid.
         assert!(inner.break_start.is_some() && inner.break_pos.is_some());
@@ -326,7 +362,7 @@ impl MMList {
             .expect("Program break area should be valid");
 
         let len = pos - current_break;
-        self.page_table.set_anonymous(
+        inner.page_table.set_anonymous(
             VRange::from(program_break.range().end()).grow(len),
             Permission {
                 write: true,
@@ -342,19 +378,81 @@ impl MMList {
 
     /// This should be called only **once** for every thread.
     pub fn register_break(&self, start: VAddr) {
-        let mut inner = self.inner.lock_irq();
+        let inner = self.inner.borrow();
+        let mut inner = inner.lock_irq();
         assert!(inner.break_start.is_none() && inner.break_pos.is_none());
 
         inner.break_start = Some(start.into());
         inner.break_pos = Some(start);
     }
-}
 
-impl Drop for MMList {
-    fn drop(&mut self) {
-        let inner = self.inner.get_mut();
-        assert!(inner.areas.is_empty());
-        assert_eq!(inner.break_start, None);
-        assert_eq!(inner.break_pos, None);
+    /// Access the memory area with the given function.
+    /// The function will be called with the offset of the area and the slice of the area.
+    pub fn access_mut<F>(&self, start: VAddr, len: usize, func: F) -> KResult<()>
+    where
+        F: Fn(usize, &mut [u8]),
+    {
+        // First, validate the address range.
+        let end = start + len;
+        if !start.is_user() || !end.is_user() {
+            return Err(EINVAL);
+        }
+
+        let inner = self.inner.borrow();
+        let inner = inner.lock_irq();
+
+        let mut offset = 0;
+        let mut remaining = len;
+        let mut current = start;
+
+        while remaining > 0 {
+            let area = inner.overlapping_addr(current).ok_or(EFAULT)?;
+
+            let area_start = area.range().start();
+            let area_end = area.range().end();
+            let area_remaining = area_end - current;
+
+            let access_len = remaining.min(area_remaining);
+            let access_end = current + access_len;
+
+            for (idx, pte) in inner
+                .page_table
+                .iter_user(VRange::new(current, access_end))?
+                .enumerate()
+            {
+                let page_start = current.floor() + idx * 0x1000;
+                let page_end = page_start + 0x1000;
+
+                area.handle(pte, page_start - area_start)?;
+
+                let start_offset;
+                if page_start < current {
+                    start_offset = current - page_start;
+                } else {
+                    start_offset = 0;
+                }
+
+                let end_offset;
+                if page_end > access_end {
+                    end_offset = access_end - page_start;
+                } else {
+                    end_offset = 0x1000;
+                }
+
+                unsafe {
+                    let page = Page::from_pfn(pte.pfn(), 0);
+                    func(
+                        offset + idx * 0x1000,
+                        &mut page.as_mut_slice()[start_offset..end_offset],
+                    );
+                }
+            }
+
+            offset += access_len;
+            remaining -= access_len;
+            current = access_end;
+        }
+
+        Ok(())
     }
 }

+ 6 - 78
src/kernel/mem/mm_list/page_fault.rs

@@ -1,9 +1,6 @@
 use arch::InterruptContext;
-use bindings::{PA_A, PA_ANON, PA_COW, PA_MMAP, PA_P, PA_RW};
 use bitflags::bitflags;
 
-use crate::kernel::mem::paging::{Page, PageBuffer};
-use crate::kernel::mem::phys::{CachedPP, PhysPtr};
 use crate::kernel::mem::{Mapping, VRange};
 use crate::kernel::task::{ProcessList, Signal, Thread};
 use crate::prelude::*;
@@ -37,7 +34,9 @@ impl MMList {
         addr: VAddr,
         error: PageFaultError,
     ) -> Result<(), Signal> {
-        let inner = self.inner.lock();
+        let inner = self.inner.borrow();
+        let inner = inner.lock();
+
         let area = match inner.areas.get(&VRange::from(addr)) {
             Some(area) => area,
             None => {
@@ -61,7 +60,7 @@ impl MMList {
             }
         }
 
-        let pte = self
+        let pte = inner
             .page_table
             .iter_user(VRange::new(addr.floor(), addr.floor() + 0x1000))
             .unwrap()
@@ -74,79 +73,8 @@ impl MMList {
             return Ok(());
         }
 
-        let mut pfn = pte.pfn();
-        let mut attributes = pte.attributes();
-
-        if attributes & PA_COW as usize != 0 {
-            attributes &= !PA_COW as usize;
-            if area.permission.write {
-                attributes |= PA_RW as usize;
-            } else {
-                attributes &= !PA_RW as usize;
-            }
-
-            let page = unsafe { Page::take_pfn(pfn, 0) };
-            if unsafe { page.load_refcount() } == 1 {
-                // SAFETY: This is actually safe. If we read `1` here and we have `MMList` lock
-                // held, there couldn't be neither other processes sharing the page, nor other
-                // threads making the page COW at the same time.
-                pte.set_attributes(attributes);
-                core::mem::forget(page);
-                return Ok(());
-            }
-
-            let new_page = Page::alloc_one();
-            if attributes & PA_ANON as usize != 0 {
-                new_page.zero();
-            } else {
-                new_page
-                    .as_cached()
-                    .as_mut_slice::<u8>(0x1000)
-                    .copy_from_slice(CachedPP::new(pfn).as_slice(0x1000));
-            }
-
-            attributes &= !(PA_A | PA_ANON) as usize;
-
-            pfn = new_page.into_pfn();
-            pte.set(pfn, attributes);
-        }
-
-        // TODO: shared mapping
-        if attributes & PA_MMAP as usize != 0 {
-            attributes |= PA_P as usize;
-
-            if let Mapping::File(mapping) = &area.mapping {
-                let load_offset = addr.floor() - area.range().start();
-                if load_offset < mapping.length {
-                    // SAFETY: Since we are here, the `pfn` must refer to a valid buddy page.
-                    let page = unsafe { Page::from_pfn(pfn, 0) };
-                    let nread = mapping
-                        .file
-                        .read(
-                            &mut PageBuffer::new(page.clone()),
-                            mapping.offset + load_offset,
-                        )
-                        .map_err(|_| Signal::SIGBUS)?;
-
-                    if nread < page.len() {
-                        page.as_cached().as_mut_slice::<u8>(0x1000)[nread..].fill(0);
-                    }
-
-                    if mapping.length - load_offset < 0x1000 {
-                        let length_to_end = mapping.length - load_offset;
-                        page.as_cached().as_mut_slice::<u8>(0x1000)[length_to_end..].fill(0);
-                    }
-                }
-                // Otherwise, the page is kept zero emptied.
-
-                attributes &= !PA_MMAP as usize;
-                pte.set_attributes(attributes);
-            } else {
-                panic!("Anonymous mapping should not be PA_MMAP");
-            }
-        }
-
-        Ok(())
+        area.handle(pte, addr.floor() - area.range().start())
+            .map_err(|_| Signal::SIGBUS)
     }
 }
 

+ 18 - 4
src/kernel/mem/page_table.rs

@@ -14,10 +14,13 @@ use super::{MMArea, Permission};
 const PA_P: usize = 0x001;
 const PA_RW: usize = 0x002;
 const PA_US: usize = 0x004;
+#[allow(dead_code)]
 const PA_PWT: usize = 0x008;
+#[allow(dead_code)]
 const PA_PCD: usize = 0x010;
 const PA_A: usize = 0x020;
 const PA_D: usize = 0x040;
+#[allow(dead_code)]
 const PA_PS: usize = 0x080;
 const PA_G: usize = 0x100;
 const PA_COW: usize = 0x200;
@@ -35,6 +38,7 @@ pub struct PageTable {
     page: Page,
 }
 
+#[allow(dead_code)]
 pub struct PTEIterator<'lt, const KERNEL: bool> {
     count: usize,
     i4: u16,
@@ -68,6 +72,14 @@ impl PTE {
         self.0 & PA_P != 0
     }
 
+    pub fn is_cow(&self) -> bool {
+        self.0 & PA_COW != 0
+    }
+
+    pub fn is_mmap(&self) -> bool {
+        self.0 & PA_MMAP != 0
+    }
+
     pub fn pfn(&self) -> usize {
         self.0 & !PA_MASK
     }
@@ -80,6 +92,7 @@ impl PTE {
         self.0 = pfn | attributes;
     }
 
+    #[allow(dead_code)]
     pub fn set_pfn(&mut self, pfn: usize) {
         self.set(pfn, self.attributes())
     }
@@ -212,18 +225,19 @@ impl PageTable {
         Self { page }
     }
 
+    pub fn root_page_table(&self) -> usize {
+        self.page.as_phys()
+    }
+
     pub fn iter_user(&self, range: VRange) -> KResult<PTEIterator<'_, false>> {
         PTEIterator::new(&self.page, range.start().floor(), range.end().ceil())
     }
 
+    #[allow(dead_code)]
     pub fn iter_kernel(&self, range: VRange) -> KResult<PTEIterator<'_, true>> {
         PTEIterator::new(&self.page, range.start().floor(), range.end().ceil())
     }
 
-    pub fn switch(&self) {
-        arch::set_root_page_table(self.page.as_phys())
-    }
-
     pub fn unmap(&self, area: &MMArea) {
         let range = area.range();
         let use_invlpg = range.len() / 4096 < 4;

+ 11 - 34
src/kernel/mem/paging.rs

@@ -1,7 +1,6 @@
 use super::address::PFN;
 use super::page_alloc::{alloc_page, alloc_pages, free_pages, PagePtr};
 use super::phys::PhysPtr;
-use crate::bindings::root::EFAULT;
 use crate::io::{Buffer, FillResult};
 use crate::kernel::mem::phys;
 use core::fmt;
@@ -81,12 +80,16 @@ impl Page {
         phys::NoCachePP::new(self.as_phys())
     }
 
-    pub fn zero(&self) {
-        use phys::PhysPtr;
+    pub fn as_slice<'r, 'lt>(&'r self) -> &'lt [u8] {
+        self.as_cached().as_slice(self.len())
+    }
 
-        unsafe {
-            core::ptr::write_bytes(self.as_cached().as_ptr::<u8>(), 0, self.len());
-        }
+    pub fn as_mut_slice<'r, 'lt>(&'r self) -> &'lt mut [u8] {
+        self.as_cached().as_mut_slice(self.len())
+    }
+
+    pub fn zero(&self) {
+        self.as_mut_slice().fill(0);
     }
 
     /// # Safety
@@ -160,16 +163,11 @@ impl PageBuffer {
     }
 
     pub fn as_slice(&self) -> &[u8] {
-        unsafe { core::slice::from_raw_parts(self.page.as_cached().as_ptr::<u8>(), self.offset) }
+        self.page.as_slice()
     }
 
     fn available_as_slice(&self) -> &mut [u8] {
-        unsafe {
-            core::slice::from_raw_parts_mut(
-                self.page.as_cached().as_ptr::<u8>().add(self.offset),
-                self.remaining(),
-            )
-        }
+        &mut self.page.as_mut_slice()[self.offset..]
     }
 }
 
@@ -198,24 +196,3 @@ impl Buffer for PageBuffer {
         }
     }
 }
-
-/// Copy data from a slice to a `Page`
-///
-/// DONT USE THIS FUNCTION TO COPY DATA TO MMIO ADDRESSES
-///
-/// # Returns
-///
-/// Returns `Err(EFAULT)` if the slice is larger than the page
-/// Returns `Ok(())` otherwise
-pub fn copy_to_page(src: &[u8], dst: &Page) -> Result<(), u32> {
-    use phys::PhysPtr;
-    if src.len() > dst.len() {
-        return Err(EFAULT);
-    }
-
-    unsafe {
-        core::ptr::copy_nonoverlapping(src.as_ptr(), dst.as_cached().as_ptr(), src.len());
-    }
-
-    Ok(())
-}

+ 1 - 0
src/kernel/mem/phys.rs

@@ -3,6 +3,7 @@ use core::fmt;
 pub trait PhysPtr {
     fn as_ptr<T>(&self) -> *mut T;
 
+    #[allow(dead_code)]
     fn as_ref<'lifetime, T>(&self) -> &'lifetime T {
         unsafe { &*(self.as_ptr()) }
     }

+ 3 - 0
src/kernel/syscall.rs

@@ -181,6 +181,7 @@ use super::task::Thread;
 
 pub(self) use {arg_register, define_syscall32, format_expand, register_syscall, syscall32_call};
 
+#[allow(dead_code)]
 pub(self) struct SyscallHandler {
     handler: fn(&mut InterruptContext, &mut ExtendedContext) -> usize,
     name: &'static str,
@@ -192,6 +193,7 @@ pub(self) fn register_syscall_handler(
     name: &'static str,
 ) {
     // SAFETY: `SYSCALL_HANDLERS` is never modified after initialization.
+    #[allow(static_mut_refs)]
     let syscall = unsafe { SYSCALL_HANDLERS.get_mut(no) }.unwrap();
     assert!(
         syscall.replace(SyscallHandler { handler, name }).is_none(),
@@ -218,6 +220,7 @@ pub fn handle_syscall32(
     ext_ctx: &mut ExtendedContext,
 ) {
     // SAFETY: `SYSCALL_HANDLERS` are never modified after initialization.
+    #[allow(static_mut_refs)]
     let syscall = unsafe { SYSCALL_HANDLERS.get(no) }.and_then(Option::as_ref);
 
     match syscall {

+ 35 - 15
src/kernel/syscall/file_rw.rs

@@ -8,6 +8,7 @@ use bindings::{
 use crate::{
     io::{Buffer, BufferFill},
     kernel::{
+        constants::AT_EMPTY_PATH,
         task::Thread,
         user::{
             dataflow::{CheckedUserPointer, UserBuffer, UserString},
@@ -66,14 +67,18 @@ fn do_dup2(old_fd: u32, new_fd: u32) -> KResult<u32> {
     files.dup_to(old_fd, new_fd, 0)
 }
 
-fn do_pipe(pipe_fd: *mut [u32; 2]) -> KResult<()> {
+fn do_pipe2(pipe_fd: *mut [u32; 2], flags: u32) -> KResult<()> {
     let mut buffer = UserBuffer::new(pipe_fd as *mut u8, core::mem::size_of::<[u32; 2]>())?;
     let files = FileArray::get_current();
-    let (read_fd, write_fd) = files.pipe()?;
+    let (read_fd, write_fd) = files.pipe(flags)?;
 
     buffer.copy(&[read_fd, write_fd])?.ok_or(EFAULT)
 }
 
+fn do_pipe(pipe_fd: *mut [u32; 2]) -> KResult<()> {
+    do_pipe2(pipe_fd, 0)
+}
+
 fn do_getdents(fd: u32, buffer: *mut u8, bufsize: usize) -> KResult<usize> {
     let mut buffer = UserBuffer::new(buffer, bufsize)?;
     let files = FileArray::get_current();
@@ -95,23 +100,36 @@ fn do_statx(dirfd: u32, path: *const u8, flags: u32, mask: u32, buffer: *mut u8)
         unimplemented!("AT_STATX_SYNC_TYPE={:x}", flags & AT_STATX_SYNC_TYPE);
     }
 
-    if dirfd != AT_FDCWD as u32 {
-        unimplemented!("dirfd={}", dirfd);
-    }
-
-    let path = UserString::new(path)?;
-    let path = Path::new(path.as_cstr().to_bytes())?;
+    let mut stat: statx = unsafe { MaybeUninit::zeroed().assume_init() };
     let mut buffer = UserBuffer::new(buffer, core::mem::size_of::<statx>())?;
 
-    let file = Dentry::open(
-        &FsContext::get_current(),
-        path,
-        (flags & AT_SYMLINK_NOFOLLOW) != AT_SYMLINK_NOFOLLOW,
-    )?;
+    if (flags & AT_EMPTY_PATH) != 0 {
+        let file = FileArray::get_current().get(dirfd).ok_or(EBADF)?;
+        file.statx(&mut stat, mask)?;
+    } else {
+        let path = UserString::new(path)?;
+        let path = Path::new(path.as_cstr().to_bytes())?;
+
+        let file;
+        if dirfd != AT_FDCWD as u32 && !path.is_absolute() {
+            let at = FileArray::get_current().get(dirfd).ok_or(EBADF)?;
+            file = Dentry::open_at(
+                &FsContext::get_current(),
+                at.as_path().ok_or(EBADF)?,
+                path,
+                (flags & AT_SYMLINK_NOFOLLOW) != AT_SYMLINK_NOFOLLOW,
+            )?;
+        } else {
+            file = Dentry::open(
+                &FsContext::get_current(),
+                path,
+                (flags & AT_SYMLINK_NOFOLLOW) != AT_SYMLINK_NOFOLLOW,
+            )?;
+        }
 
-    let mut stat: statx = unsafe { MaybeUninit::zeroed().assume_init() };
+        file.statx(&mut stat, mask)?;
+    }
 
-    file.statx(&mut stat, mask)?;
     buffer.copy(&stat)?.ok_or(EFAULT)
 }
 
@@ -356,6 +374,7 @@ define_syscall32!(sys_close, do_close, fd: u32);
 define_syscall32!(sys_dup, do_dup, fd: u32);
 define_syscall32!(sys_dup2, do_dup2, old_fd: u32, new_fd: u32);
 define_syscall32!(sys_pipe, do_pipe, pipe_fd: *mut [u32; 2]);
+define_syscall32!(sys_pipe2, do_pipe2, pipe_fd: *mut [u32; 2], flags: u32);
 define_syscall32!(sys_getdents, do_getdents, fd: u32, buffer: *mut u8, bufsize: usize);
 define_syscall32!(sys_getdents64, do_getdents64, fd: u32, buffer: *mut u8, bufsize: usize);
 define_syscall32!(sys_statx, do_statx, fd: u32, path: *const u8, flags: u32, mask: u32, buffer: *mut u8);
@@ -398,5 +417,6 @@ pub(super) fn register() {
     register_syscall!(0xdc, getdents64);
     register_syscall!(0xdd, fcntl64);
     register_syscall!(0xef, sendfile64);
+    register_syscall!(0x14b, pipe2);
     register_syscall!(0x17f, statx);
 }

+ 7 - 0
src/kernel/syscall/mm.rs

@@ -108,8 +108,14 @@ impl MapArgument<'_, UserMmapFlags> for MapArgumentImpl {
     }
 }
 
+#[allow(unused_variables)]
+fn do_madvise(addr: usize, len: usize, advice: u32) -> KResult<()> {
+    Ok(())
+}
+
 define_syscall32!(sys_brk, do_brk, addr: usize);
 define_syscall32!(sys_munmap, do_munmap, addr: usize, len: usize);
+define_syscall32!(sys_madvise, do_madvise, addr: usize, len: usize, advice: u32);
 define_syscall32!(sys_mmap_pgoff, do_mmap_pgoff,
     addr: usize, len: usize,
     prot: UserMmapProtocol,
@@ -120,4 +126,5 @@ pub(super) fn register() {
     register_syscall!(0x2d, brk);
     register_syscall!(0x5b, munmap);
     register_syscall!(0xc0, mmap_pgoff);
+    register_syscall!(0xdb, madvise);
 }

+ 133 - 9
src/kernel/syscall/procops.rs

@@ -1,13 +1,15 @@
 use alloc::borrow::ToOwned;
 use alloc::ffi::CString;
 use arch::{ExtendedContext, InterruptContext};
-use bindings::{EINVAL, ENOENT, ENOTDIR, ESRCH};
+use bindings::{EINVAL, ENOENT, ENOTDIR, ERANGE, ESRCH};
 use bitflags::bitflags;
 
 use crate::elf::ParsedElf32;
 use crate::io::Buffer;
-use crate::kernel::constants::{PR_GET_NAME, PR_SET_NAME, SIG_BLOCK, SIG_SETMASK, SIG_UNBLOCK};
-use crate::kernel::mem::VAddr;
+use crate::kernel::constants::{
+    ENOSYS, PR_GET_NAME, PR_SET_NAME, RLIMIT_STACK, SIG_BLOCK, SIG_SETMASK, SIG_UNBLOCK,
+};
+use crate::kernel::mem::{Page, PageBuffer, VAddr};
 use crate::kernel::task::{
     ProcessList, Scheduler, Signal, SignalAction, Thread, UserDescriptor, WaitObject, WaitType,
 };
@@ -20,6 +22,7 @@ use crate::{kernel::user::dataflow::UserBuffer, prelude::*};
 
 use crate::kernel::vfs::{self, FsContext};
 
+use super::sysinfo::TimeVal;
 use super::{define_syscall32, register_syscall};
 
 fn do_umask(mask: u32) -> KResult<u32> {
@@ -33,9 +36,12 @@ fn do_umask(mask: u32) -> KResult<u32> {
 
 fn do_getcwd(buffer: *mut u8, bufsize: usize) -> KResult<usize> {
     let context = FsContext::get_current();
-    let mut buffer = UserBuffer::new(buffer, bufsize)?;
+    let mut user_buffer = UserBuffer::new(buffer, bufsize)?;
 
+    let page = Page::alloc_one();
+    let mut buffer = PageBuffer::new(page.clone());
     context.cwd.lock().get_path(&context, &mut buffer)?;
+    user_buffer.fill(page.as_slice())?.ok_or(ERANGE)?;
 
     Ok(buffer.wrote())
 }
@@ -89,8 +95,9 @@ fn do_execve(exec: &[u8], argv: Vec<CString>, envp: Vec<CString>) -> KResult<(VA
     // TODO: When `execve` is called by one of the threads in a process, the other threads
     //       should be terminated and `execve` is performed in the thread group leader.
     let elf = ParsedElf32::parse(dentry.clone())?;
-    let result = elf.load(&Thread::current().process.mm_list, argv, envp);
-    if let Ok((ip, sp)) = result {
+    let result = elf.load(argv, envp);
+    if let Ok((ip, sp, mm_list)) = result {
+        Thread::current().process.mm_list.replace(mm_list);
         Thread::current().files.on_exec();
         Thread::current().signal_list.clear_non_ignore();
         Thread::current().set_name(dentry.name().clone());
@@ -169,9 +176,9 @@ bitflags! {
 }
 
 fn do_waitpid(waitpid: u32, arg1: *mut u32, options: u32) -> KResult<u32> {
-    if waitpid != u32::MAX {
-        unimplemented!("waitpid with pid {waitpid}")
-    }
+    // if waitpid != u32::MAX {
+    //     unimplemented!("waitpid with pid {waitpid}")
+    // }
     let options = match UserWaitOptions::from_bits(options) {
         None => unimplemented!("waitpid with options {options}"),
         Some(options) => options,
@@ -429,6 +436,109 @@ fn do_rt_sigaction(
     Ok(())
 }
 
+#[repr(C)]
+#[derive(Debug, Clone, Copy)]
+struct RLimit {
+    rlim_cur: u64,
+    rlim_max: u64,
+}
+
+fn do_prlimit64(
+    pid: u32,
+    resource: u32,
+    new_limit: *const RLimit,
+    old_limit: *mut RLimit,
+) -> KResult<()> {
+    if pid != 0 {
+        return Err(ENOSYS);
+    }
+
+    match resource {
+        RLIMIT_STACK => {
+            if !old_limit.is_null() {
+                let old_limit = UserPointerMut::new(old_limit)?;
+                let rlimit = RLimit {
+                    rlim_cur: 8 * 1024 * 1024,
+                    rlim_max: 8 * 1024 * 1024,
+                };
+                old_limit.write(rlimit)?;
+            }
+
+            if !new_limit.is_null() {
+                return Err(ENOSYS);
+            }
+            Ok(())
+        }
+        _ => Err(ENOSYS),
+    }
+}
+
+fn do_getrlimit(resource: u32, rlimit: *mut RLimit) -> KResult<()> {
+    do_prlimit64(0, resource, core::ptr::null(), rlimit)
+}
+
+#[repr(C)]
+#[derive(Clone, Copy)]
+struct RUsage {
+    ru_utime: TimeVal,
+    ru_stime: TimeVal,
+    ru_maxrss: u32,
+    ru_ixrss: u32,
+    ru_idrss: u32,
+    ru_isrss: u32,
+    ru_minflt: u32,
+    ru_majflt: u32,
+    ru_nswap: u32,
+    ru_inblock: u32,
+    ru_oublock: u32,
+    ru_msgsnd: u32,
+    ru_msgrcv: u32,
+    ru_nsignals: u32,
+    ru_nvcsw: u32,
+    ru_nivcsw: u32,
+}
+
+fn do_getrusage(who: u32, rusage: *mut RUsage) -> KResult<()> {
+    if who != 0 {
+        return Err(ENOSYS);
+    }
+
+    let rusage = UserPointerMut::new(rusage)?;
+    rusage.write(RUsage {
+        ru_utime: TimeVal::default(),
+        ru_stime: TimeVal::default(),
+        ru_maxrss: 0,
+        ru_ixrss: 0,
+        ru_idrss: 0,
+        ru_isrss: 0,
+        ru_minflt: 0,
+        ru_majflt: 0,
+        ru_nswap: 0,
+        ru_inblock: 0,
+        ru_oublock: 0,
+        ru_msgsnd: 0,
+        ru_msgrcv: 0,
+        ru_nsignals: 0,
+        ru_nvcsw: 0,
+        ru_nivcsw: 0,
+    })?;
+
+    Ok(())
+}
+
+fn do_chmod(pathname: *const u8, mode: u32) -> KResult<()> {
+    let context = FsContext::get_current();
+    let path = UserString::new(pathname)?;
+    let path = Path::new(path.as_cstr().to_bytes())?;
+
+    let dentry = Dentry::open(&context, path, true)?;
+    if !dentry.is_valid() {
+        return Err(ENOENT);
+    }
+
+    dentry.chmod(mode)
+}
+
 define_syscall32!(sys_chdir, do_chdir, path: *const u8);
 define_syscall32!(sys_umask, do_umask, mask: u32);
 define_syscall32!(sys_getcwd, do_getcwd, buffer: *mut u8, bufsize: usize);
@@ -457,6 +567,15 @@ define_syscall32!(sys_rt_sigprocmask, do_rt_sigprocmask,
     how: u32, set: *mut u64, oldset: *mut u64, sigsetsize: usize);
 define_syscall32!(sys_rt_sigaction, do_rt_sigaction,
     signum: u32, act: *const UserSignalAction, oldact: *mut UserSignalAction, sigsetsize: usize);
+define_syscall32!(sys_prlimit64, do_prlimit64,
+    pid: u32, resource: u32, new_limit: *const RLimit, old_limit: *mut RLimit);
+define_syscall32!(sys_getrlimit, do_getrlimit, resource: u32, rlimit: *mut RLimit);
+define_syscall32!(sys_getrusage, do_getrusage, who: u32, rlimit: *mut RUsage);
+define_syscall32!(sys_chmod, do_chmod, pathname: *const u8, mode: u32);
+
+fn sys_vfork(int_stack: &mut InterruptContext, ext: &mut ExtendedContext) -> usize {
+    sys_fork(int_stack, ext)
+}
 
 fn sys_fork(int_stack: &mut InterruptContext, _: &mut ExtendedContext) -> usize {
     let mut procs = ProcessList::get().lock();
@@ -487,6 +606,7 @@ pub(super) fn register() {
     register_syscall!(0x07, waitpid);
     register_syscall!(0x0b, execve);
     register_syscall!(0x0c, chdir);
+    register_syscall!(0x0f, chmod);
     register_syscall!(0x14, getpid);
     register_syscall!(0x15, mount);
     register_syscall!(0x25, kill);
@@ -495,6 +615,7 @@ pub(super) fn register() {
     register_syscall!(0x3c, umask);
     register_syscall!(0x40, getppid);
     register_syscall!(0x42, setsid);
+    register_syscall!(0x4d, getrusage);
     register_syscall!(0x72, wait4);
     register_syscall!(0x77, sigreturn);
     register_syscall!(0x84, getpgid);
@@ -503,6 +624,8 @@ pub(super) fn register() {
     register_syscall!(0xae, rt_sigaction);
     register_syscall!(0xaf, rt_sigprocmask);
     register_syscall!(0xb7, getcwd);
+    register_syscall!(0xbe, vfork);
+    register_syscall!(0xbf, getrlimit);
     register_syscall!(0xc7, getuid);
     register_syscall!(0xc8, getgid);
     register_syscall!(0xc9, geteuid);
@@ -512,5 +635,6 @@ pub(super) fn register() {
     register_syscall!(0xf3, set_thread_area);
     register_syscall!(0xfc, exit);
     register_syscall!(0x102, set_tid_address);
+    register_syscall!(0x154, prlimit64);
     register_syscall!(0x180, arch_prctl);
 }

+ 65 - 3
src/kernel/syscall/sysinfo.rs

@@ -49,14 +49,16 @@ fn do_newuname(buffer: *mut NewUTSName) -> KResult<()> {
     buffer.write(uname)
 }
 
-#[derive(Clone, Copy)]
-struct TimeVal {
+#[allow(dead_code)]
+#[derive(Default, Clone, Copy)]
+pub struct TimeVal {
     sec: u64,
     usec: u64,
 }
 
+#[allow(dead_code)]
 #[derive(Clone, Copy)]
-struct TimeSpec {
+pub struct TimeSpec {
     sec: u64,
     nsec: u64,
 }
@@ -91,12 +93,72 @@ fn do_clock_gettime64(clock_id: u32, timespec: *mut TimeSpec) -> KResult<()> {
     })
 }
 
+#[repr(C)]
+#[derive(Clone, Copy)]
+struct Sysinfo {
+    uptime: u32,
+    loads: [u32; 3],
+    totalram: u32,
+    freeram: u32,
+    sharedram: u32,
+    bufferram: u32,
+    totalswap: u32,
+    freeswap: u32,
+    procs: u16,
+    totalhigh: u32,
+    freehigh: u32,
+    mem_unit: u32,
+    _padding: [u8; 8],
+}
+
+fn do_sysinfo(info: *mut Sysinfo) -> KResult<()> {
+    let info = UserPointerMut::new(info)?;
+    info.write(Sysinfo {
+        uptime: ticks().in_secs() as u32,
+        loads: [0; 3],
+        totalram: 100,
+        freeram: 50,
+        sharedram: 0,
+        bufferram: 0,
+        totalswap: 0,
+        freeswap: 0,
+        procs: 10,
+        totalhigh: 0,
+        freehigh: 0,
+        mem_unit: 1024,
+        _padding: [0; 8],
+    })
+}
+
+#[repr(C)]
+#[derive(Clone, Copy)]
+struct TMS {
+    tms_utime: u32,
+    tms_stime: u32,
+    tms_cutime: u32,
+    tms_cstime: u32,
+}
+
+fn do_times(tms: *mut TMS) -> KResult<()> {
+    let tms = UserPointerMut::new(tms)?;
+    tms.write(TMS {
+        tms_utime: 0,
+        tms_stime: 0,
+        tms_cutime: 0,
+        tms_cstime: 0,
+    })
+}
+
 define_syscall32!(sys_newuname, do_newuname, buffer: *mut NewUTSName);
 define_syscall32!(sys_gettimeofday, do_gettimeofday, timeval: *mut TimeVal, timezone: *mut ());
 define_syscall32!(sys_clock_gettime64, do_clock_gettime64, clock_id: u32, timespec: *mut TimeSpec);
+define_syscall32!(sys_sysinfo, do_sysinfo, info: *mut Sysinfo);
+define_syscall32!(sys_times, do_times, tms: *mut TMS);
 
 pub(super) fn register() {
+    register_syscall!(0x2b, times);
     register_syscall!(0x4e, gettimeofday);
+    register_syscall!(0x74, sysinfo);
     register_syscall!(0x7a, newuname);
     register_syscall!(0x193, clock_gettime64);
 }

+ 1 - 0
src/kernel/task/kstack.rs

@@ -4,6 +4,7 @@ use crate::kernel::{
 };
 use arch::InterruptContext;
 
+#[allow(dead_code)]
 pub struct KernelStack {
     pages: Page,
     bottom: usize,

+ 1 - 1
src/kernel/task/process.rs

@@ -29,7 +29,7 @@ pub struct Process {
     pub pid: u32,
 
     pub wait_list: WaitList,
-    pub mm_list: Arc<MMList>,
+    pub mm_list: MMList,
 
     /// Parent process
     ///

+ 7 - 2
src/kernel/task/process_list.rs

@@ -2,6 +2,7 @@ use alloc::{
     collections::btree_map::BTreeMap,
     sync::{Arc, Weak},
 };
+use bindings::KERNEL_PML4;
 
 use crate::{
     prelude::*,
@@ -145,8 +146,12 @@ impl ProcessList {
             }
         }
 
-        // Unmap all user memory areas
-        process.mm_list.clear_user();
+        // Release the MMList as well as the page table.
+        // Before we release the page table, we need to switch to the kernel page table.
+        arch::set_root_page_table(KERNEL_PML4 as usize);
+        unsafe {
+            process.mm_list.release();
+        }
 
         // Make children orphans (adopted by init)
         {

+ 8 - 0
src/kernel/terminal.rs

@@ -28,16 +28,24 @@ const VQUIT: usize = 1;
 const VERASE: usize = 2;
 const VKILL: usize = 3;
 const VEOF: usize = 4;
+#[allow(dead_code)]
 const VTIME: usize = 5;
 const VMIN: usize = 6;
+#[allow(dead_code)]
 const VSWTC: usize = 7;
+#[allow(dead_code)]
 const VSTART: usize = 8;
+#[allow(dead_code)]
 const VSTOP: usize = 9;
 const VSUSP: usize = 10;
 const VEOL: usize = 11;
+#[allow(dead_code)]
 const VREPRINT: usize = 12;
+#[allow(dead_code)]
 const VDISCARD: usize = 13;
+#[allow(dead_code)]
 const VWERASE: usize = 14;
+#[allow(dead_code)]
 const VLNEXT: usize = 15;
 const VEOL2: usize = 16;
 

+ 1 - 0
src/kernel/timer.rs

@@ -13,6 +13,7 @@ impl Ticks {
         self.0 / 100
     }
 
+    #[allow(dead_code)]
     pub fn in_msecs(&self) -> usize {
         self.0 * 10
     }

+ 1 - 0
src/kernel/user.rs

@@ -1,5 +1,6 @@
 pub mod dataflow;
 
+#[allow(unused_imports)]
 pub use dataflow::{UserBuffer, UserString};
 
 pub type UserPointer<'a, T> = dataflow::UserPointer<'a, T, true>;

+ 14 - 0
src/kernel/vfs/dentry.rs

@@ -60,6 +60,7 @@ impl core::fmt::Debug for Dentry {
 }
 
 const D_DIRECTORY: u64 = 1;
+#[allow(dead_code)]
 const D_MOUNTPOINT: u64 = 2;
 const D_SYMLINK: u64 = 4;
 const D_REGULAR: u64 = 8;
@@ -310,6 +311,15 @@ impl Dentry {
         Dentry::open_recursive(context, &cwd, path, follow_symlinks, 0)
     }
 
+    pub fn open_at(
+        context: &FsContext,
+        at: &Arc<Self>,
+        path: Path,
+        follow_symlinks: bool,
+    ) -> KResult<Arc<Self>> {
+        Dentry::open_recursive(context, at, path, follow_symlinks, 0)
+    }
+
     pub fn get_path(
         self: &Arc<Dentry>,
         context: &FsContext,
@@ -423,4 +433,8 @@ impl Dentry {
             self.parent.get_inode().unwrap().mknod(self, mode, devid)
         }
     }
+
+    pub fn chmod(&self, mode: Mode) -> KResult<()> {
+        self.get_inode()?.chmod(mode)
+    }
 }

+ 22 - 6
src/kernel/vfs/file.rs

@@ -1,10 +1,10 @@
 use core::{ops::ControlFlow, sync::atomic::Ordering};
 
 use crate::{
-    io::{Buffer, BufferFill, RawBuffer},
+    io::{Buffer, BufferFill, ByteBuffer},
     kernel::{
         constants::{TCGETS, TCSETS, TIOCGPGRP, TIOCGWINSZ, TIOCSPGRP},
-        mem::{paging::Page, phys::PhysPtr},
+        mem::paging::Page,
         task::{Signal, Thread},
         terminal::{Terminal, TerminalIORequest},
         user::{UserPointer, UserPointerMut},
@@ -15,13 +15,15 @@ use crate::{
 };
 
 use alloc::{collections::vec_deque::VecDeque, sync::Arc};
-use bindings::{EBADF, EFAULT, EINTR, EINVAL, ENOTDIR, ENOTTY, EOVERFLOW, EPIPE, ESPIPE, S_IFMT};
+use bindings::{
+    statx, EBADF, EFAULT, EINTR, EINVAL, ENOTDIR, ENOTTY, EOVERFLOW, EPIPE, ESPIPE, S_IFMT,
+};
 use bitflags::bitflags;
 
 use super::{
     dentry::Dentry,
     inode::{Mode, WriteOffset},
-    s_isblk, s_isreg,
+    s_isblk, s_isdir, s_isreg,
 };
 
 pub struct InodeFile {
@@ -518,8 +520,8 @@ impl File {
             }
 
             let batch_size = usize::min(count - tot, buffer_page.len());
-            let slice = buffer_page.as_cached().as_mut_slice::<u8>(batch_size);
-            let mut buffer = RawBuffer::new_from_slice(slice);
+            let slice = &mut buffer_page.as_mut_slice()[..batch_size];
+            let mut buffer = ByteBuffer::new(slice);
 
             let nwrote = self.read(&mut buffer)?;
 
@@ -547,4 +549,18 @@ impl File {
             _ => unimplemented!("Poll event not supported."),
         }
     }
+
+    pub fn statx(&self, buffer: &mut statx, mask: u32) -> KResult<()> {
+        match self {
+            File::Inode(inode) => inode.dentry.statx(buffer, mask),
+            _ => Err(EBADF),
+        }
+    }
+
+    pub fn as_path(&self) -> Option<&Arc<Dentry>> {
+        match self {
+            File::Inode(inode_file) if s_isdir(inode_file.mode) => Some(&inode_file.dentry),
+            _ => None,
+        }
+    }
 }

+ 6 - 3
src/kernel/vfs/filearray.rs

@@ -70,6 +70,7 @@ impl FileArray {
         })
     }
 
+    #[allow(dead_code)]
     pub fn new_shared(other: &Arc<Self>) -> Arc<Self> {
         other.clone()
     }
@@ -154,16 +155,18 @@ impl FileArray {
 
     /// # Return
     /// `(read_fd, write_fd)`
-    pub fn pipe(&self) -> KResult<(FD, FD)> {
+    pub fn pipe(&self, flags: u32) -> KResult<(FD, FD)> {
         let mut inner = self.inner.lock();
 
         let read_fd = inner.next_fd();
         let write_fd = inner.next_fd();
 
+        let fdflag = if flags & O_CLOEXEC != 0 { FD_CLOEXEC } else { 0 };
+
         let pipe = Pipe::new();
         let (read_end, write_end) = pipe.split();
-        inner.do_insert(read_fd, 0, read_end);
-        inner.do_insert(write_fd, 0, write_end);
+        inner.do_insert(read_fd, fdflag as u64, read_end);
+        inner.do_insert(write_fd, fdflag as u64, write_end);
 
         Ok((read_fd, write_fd))
     }

+ 4 - 0
src/kernel/vfs/inode.rs

@@ -138,6 +138,10 @@ pub trait Inode: Send + Sync + InodeInner {
         Err(if !self.is_dir() { ENOTDIR } else { EPERM })
     }
 
+    fn chmod(&self, mode: Mode) -> KResult<()> {
+        Err(EPERM)
+    }
+
     fn statx(&self, stat: &mut statx, mask: u32) -> KResult<()> {
         // Safety: ffi should have checked reference
         let vfs = self.vfs.upgrade().expect("Vfs is dropped");

+ 1 - 0
src/kernel/vfs/mod.rs

@@ -71,6 +71,7 @@ impl FsContext {
         })
     }
 
+    #[allow(dead_code)]
     pub fn new_shared(other: &Arc<Self>) -> Arc<Self> {
         other.clone()
     }

+ 6 - 1
src/kernel/vfs/mount.rs

@@ -40,6 +40,7 @@ lazy_static! {
 
 static mut ROOTFS: Option<Arc<Dentry>> = None;
 
+#[allow(dead_code)]
 pub struct Mount {
     vfs: Arc<dyn Vfs>,
     root: Arc<Dentry>,
@@ -79,6 +80,7 @@ pub fn register_filesystem(fstype: &str, creator: Arc<dyn MountCreator>) -> KRes
     }
 }
 
+#[allow(dead_code)]
 struct MountPointData {
     mount: Mount,
     source: String,
@@ -198,6 +200,9 @@ pub fn init_vfs() -> KResult<()> {
 
 impl Dentry {
     pub fn kernel_root_dentry() -> Arc<Dentry> {
-        unsafe { ROOTFS.as_ref().cloned().unwrap() }
+        #[allow(static_mut_refs)]
+        unsafe {
+            ROOTFS.as_ref().cloned().unwrap()
+        }
     }
 }

+ 1 - 0
src/kernel/vfs/vfs.rs

@@ -2,6 +2,7 @@ use crate::prelude::*;
 
 use super::DevId;
 
+#[allow(dead_code)]
 pub trait Vfs: Send + Sync + AsAny {
     fn io_blksize(&self) -> usize;
     fn fs_devid(&self) -> DevId;

+ 3 - 3
src/lib.rs

@@ -164,7 +164,7 @@ extern "C" fn init_process(/* early_kstack_pfn: usize */) {
 
     unsafe { kernel::smp::bootstrap_smp() };
 
-    let (ip, sp) = {
+    let (ip, sp, mm_list) = {
         // mount fat32 /mnt directory
         let fs_context = FsContext::get_current();
         let mnt_dir = Dentry::open(&fs_context, Path::new(b"/mnt/").unwrap(), true).unwrap();
@@ -197,10 +197,10 @@ extern "C" fn init_process(/* early_kstack_pfn: usize */) {
         ];
 
         let elf = ParsedElf32::parse(init.clone()).unwrap();
-        elf.load(&Thread::current().process.mm_list, argv, envp)
-            .unwrap()
+        elf.load(argv, envp).unwrap()
     };
 
+    Thread::current().process.mm_list.replace(mm_list);
     Thread::current().files.open_console();
 
     unsafe {

+ 3 - 5
src/net/netdev.rs

@@ -20,6 +20,7 @@ pub enum LinkSpeed {
 
 pub type Mac = [u8; 6];
 
+#[allow(dead_code)]
 pub trait Netdev: Send {
     fn up(&mut self) -> Result<(), u32>;
     fn send(&mut self, data: &[u8]) -> Result<(), u32>;
@@ -54,8 +55,7 @@ impl Ord for dyn Netdev {
 
 lazy_static! {
     static ref NETDEVS_ID: Spin<u32> = Spin::new(0);
-    static ref NETDEVS: Spin<BTreeMap<u32, Arc<Mutex<dyn Netdev>>>> =
-        Spin::new(BTreeMap::new());
+    static ref NETDEVS: Spin<BTreeMap<u32, Arc<Mutex<dyn Netdev>>>> = Spin::new(BTreeMap::new());
 }
 
 pub fn alloc_id() -> u32 {
@@ -66,9 +66,7 @@ pub fn alloc_id() -> u32 {
     retval
 }
 
-pub fn register_netdev(
-    netdev: impl Netdev + 'static,
-) -> Result<Arc<Mutex<dyn Netdev>>, u32> {
+pub fn register_netdev(netdev: impl Netdev + 'static) -> Result<Arc<Mutex<dyn Netdev>>, u32> {
     let devid = netdev.id();
 
     let mut netdevs = NETDEVS.lock();

+ 9 - 0
src/path.rs

@@ -2,6 +2,8 @@ use crate::prelude::*;
 
 use bindings::ENOENT;
 
+use core::fmt::{self, Debug, Formatter};
+
 pub struct Path<'lt> {
     all: &'lt [u8],
 }
@@ -10,6 +12,7 @@ pub struct PathIterator<'lt> {
     rem: &'lt [u8],
 }
 
+#[allow(dead_code)]
 impl<'lt> Path<'lt> {
     pub fn new(all: &'lt [u8]) -> KResult<Self> {
         if all.is_empty() {
@@ -78,3 +81,9 @@ impl<'lt> Iterator for PathIterator<'lt> {
         }
     }
 }
+
+impl Debug for Path<'_> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        write!(f, "Path({:?})", self.all)
+    }
+}

+ 3 - 0
src/prelude.rs

@@ -32,6 +32,7 @@ pub(crate) use alloc::{boxed::Box, string::String, vec, vec::Vec};
 pub(crate) use core::{any::Any, fmt::Write, marker::PhantomData, str};
 use core::{mem::ManuallyDrop, ops::Deref};
 
+#[allow(unused_imports)]
 pub use crate::sync::{Locked, Mutex, RwSemaphore, Semaphore, Spin};
 
 pub struct BorrowedArc<'lt, T: ?Sized> {
@@ -48,6 +49,7 @@ impl<'lt, T: ?Sized> BorrowedArc<'lt, T> {
         }
     }
 
+    #[allow(dead_code)]
     pub fn new(ptr: &'lt *const T) -> Self {
         assert!(!ptr.is_null());
         Self {
@@ -81,6 +83,7 @@ impl<'lt, T: ?Sized> AsRef<Arc<T>> for BorrowedArc<'lt, T> {
     }
 }
 
+#[allow(dead_code)]
 pub trait AsAny: Send + Sync {
     fn as_any(&self) -> &dyn Any;
     fn as_any_mut(&mut self) -> &mut dyn Any;

+ 1 - 0
src/rcu.rs

@@ -13,6 +13,7 @@ use alloc::sync::Arc;
 
 use lazy_static::lazy_static;
 
+#[allow(dead_code)]
 pub struct RCUReadGuard<'data, T: 'data> {
     value: T,
     guard: Guard<'data, (), RwSemaphoreStrategy, false>,

+ 2 - 0
src/sync.rs

@@ -1,3 +1,4 @@
+mod arcswap;
 mod condvar;
 pub mod lock;
 mod locked;
@@ -85,5 +86,6 @@ macro_rules! might_sleep {
     };
 }
 
+pub use arcswap::ArcSwap;
 pub use locked::{AsRefMutPosition, AsRefPosition, Locked, RefMutPosition, RefPosition};
 pub(crate) use might_sleep;

+ 49 - 0
src/sync/arcswap.rs

@@ -0,0 +1,49 @@
+use core::{
+    fmt::{self, Debug, Formatter},
+    sync::atomic::{AtomicPtr, Ordering},
+};
+
+use alloc::sync::Arc;
+
+use crate::BorrowedArc;
+
+unsafe impl<T> Send for ArcSwap<T> where T: Send + Sync {}
+unsafe impl<T> Sync for ArcSwap<T> where T: Send + Sync {}
+
+pub struct ArcSwap<T> {
+    pointer: AtomicPtr<T>,
+}
+
+impl<T> ArcSwap<T> {
+    pub fn new(data: T) -> Self {
+        let pointer = Arc::into_raw(Arc::new(data));
+        Self {
+            pointer: AtomicPtr::new(pointer as *mut T),
+        }
+    }
+
+    /// # Safety
+    /// The caller must ensure that the pointer not used elsewhere before ACTUALLLY dropping that.
+    pub fn swap(&self, data: Option<Arc<T>>) -> Option<Arc<T>> {
+        let new_pointer = data.map(Arc::into_raw).unwrap_or(core::ptr::null());
+        let old_pointer = self.pointer.swap(new_pointer as *mut _, Ordering::AcqRel);
+        if old_pointer.is_null() {
+            None
+        } else {
+            Some(unsafe { Arc::from_raw(old_pointer) })
+        }
+    }
+
+    pub fn borrow(&self) -> BorrowedArc<T> {
+        BorrowedArc::from_raw(self.pointer.load(Ordering::Relaxed))
+    }
+}
+
+impl<T> Debug for ArcSwap<T>
+where
+    T: Debug,
+{
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        write!(f, "ArcSwap {{ {:?} }}", self.borrow().as_ref())
+    }
+}