Browse Source

[partial] feat: add sync stuffs

feat: add sync primitives like CondVar Mutex Spin Semaphore to rust

change: make Inode a trait again
change: replace some of the Spin's with Mutex's or Semaphore's
change: make kernel stacks 512KB in size
change: save all file references as Dentry
change: enable interrupts for ahci devices

style: update clang-format and rustfmt
greatbridf 1 tháng trước cách đây
mục cha
commit
ae698248ee
53 tập tin đã thay đổi với 2196 bổ sung1475 xóa
  1. 1 0
      .clang-format
  2. 3 3
      .rustfmt.toml
  3. 1 1
      include/kernel/async/lock.hpp
  4. 1 1
      include/kernel/mem/mm_list.hpp
  5. 15 15
      include/kernel/mem/vm_area.hpp
  6. 2 1
      include/kernel/process.hpp
  7. 1 1
      include/kernel/task/thread.hpp
  8. 13 24
      include/kernel/vfs.hpp
  9. 2 2
      include/kernel/vfs/file.hpp
  10. 4 4
      src/boot.s
  11. 5 4
      src/driver/ahci/command.rs
  12. 35 38
      src/driver/ahci/control.rs
  13. 33 33
      src/driver/ahci/defs.rs
  14. 102 52
      src/driver/ahci/mod.rs
  15. 235 52
      src/driver/ahci/port.rs
  16. 23 11
      src/driver/e1000e.rs
  17. 131 136
      src/fs/fat32.rs
  18. 130 132
      src/fs/procfs.rs
  19. 188 228
      src/fs/tmpfs.rs
  20. 5 3
      src/kernel/async/lock.cc
  21. 2 1
      src/kernel/async/waitlist.cc
  22. 12 10
      src/kernel/block.rs
  23. 35 5
      src/kernel/console.rs
  24. 3 1
      src/kernel/interrupt.cpp
  25. 12 20
      src/kernel/interrupt.rs
  26. 10 14
      src/kernel/mem/mm_list.cc
  27. 8 4
      src/kernel/mem/paging.cc
  28. 20 10
      src/kernel/mem/slab.cc
  29. 32 6
      src/kernel/process.cpp
  30. 1 1
      src/kernel/signal.cpp
  31. 1 4
      src/kernel/syscall.cpp
  32. 13 19
      src/kernel/syscall/fileops.cc
  33. 30 3
      src/kernel/task/thread.cc
  34. 3 0
      src/kernel/tty.cpp
  35. 56 58
      src/kernel/vfs.cpp
  36. 14 59
      src/kernel/vfs/dentry.rs
  37. 29 17
      src/kernel/vfs/dentry/dcache.rs
  38. 51 72
      src/kernel/vfs/ffi.rs
  39. 16 22
      src/kernel/vfs/filearr.cc
  40. 192 191
      src/kernel/vfs/inode.rs
  41. 13 10
      src/kernel/vfs/mount.rs
  42. 2 3
      src/kernel/vfs/vfs.rs
  43. 1 1
      src/lib.rs
  44. 12 11
      src/net/netdev.rs
  45. 34 145
      src/prelude.rs
  46. 31 31
      src/rcu.rs
  47. 82 3
      src/sync.rs
  48. 113 0
      src/sync/condvar.rs
  49. 136 0
      src/sync/lock.rs
  50. 157 0
      src/sync/semaphore.rs
  51. 105 0
      src/sync/spin.rs
  52. 33 0
      src/sync/strategy.rs
  53. 7 13
      src/types/elf.cpp

+ 1 - 0
.clang-format

@@ -6,6 +6,7 @@ AllowShortFunctionsOnASingleLine: Inline
 AllowShortIfStatementsOnASingleLine: Never
 AllowShortLoopsOnASingleLine: 'false'
 BreakConstructorInitializers: BeforeComma
+ColumnLimit: '100'
 FixNamespaceComments: 'true'
 IncludeBlocks: Regroup
 IndentWidth: '4'

+ 3 - 3
.rustfmt.toml

@@ -1,4 +1,4 @@
-max_width = 80
+max_width = 100
 hard_tabs = false
 tab_spaces = 4
 newline_style = "Auto"
@@ -10,8 +10,8 @@ struct_lit_width = 18
 struct_variant_width = 35
 array_width = 60
 chain_width = 60
-single_line_if_else_max_width = 50
-single_line_let_else_max_width = 50
+single_line_if_else_max_width = 60
+single_line_let_else_max_width = 60
 wrap_comments = false
 format_code_in_doc_comments = false
 doc_comment_code_block_width = 100

+ 1 - 1
include/kernel/async/lock.hpp

@@ -8,7 +8,7 @@ namespace kernel::async {
 
 using spinlock_t = unsigned long volatile;
 using lock_context_t = unsigned long;
-using preempt_count_t = std::size_t;
+using preempt_count_t = ssize_t;
 
 void preempt_disable();
 void preempt_enable();

+ 1 - 1
include/kernel/mem/mm_list.hpp

@@ -46,7 +46,7 @@ class mm_list {
 
         unsigned long flags;
 
-        const fs::rust_inode_handle* file_inode;
+        fs::dentry_pointer file;
         // MUSE BE aligned to 4kb boundary
         std::size_t file_offset;
     };

+ 15 - 15
include/kernel/mem/vm_area.hpp

@@ -20,41 +20,41 @@ struct vm_area {
 
     unsigned long flags;
 
-    const fs::rust_inode_handle* mapped_file;
+    fs::dentry_pointer mapped_file;
     std::size_t file_offset;
 
     constexpr bool is_avail(uintptr_t ostart, uintptr_t oend) const noexcept {
         return (ostart >= end || oend <= start);
     }
 
-    constexpr bool operator<(const vm_area& rhs) const noexcept {
-        return end <= rhs.start;
-    }
-    constexpr bool operator<(uintptr_t rhs) const noexcept {
-        return end <= rhs;
-    }
-    friend constexpr bool operator<(uintptr_t lhs,
-                                    const vm_area& rhs) noexcept {
+    constexpr bool operator<(const vm_area& rhs) const noexcept { return end <= rhs.start; }
+    constexpr bool operator<(uintptr_t rhs) const noexcept { return end <= rhs; }
+    friend constexpr bool operator<(uintptr_t lhs, const vm_area& rhs) noexcept {
         return lhs < rhs.start;
     }
 
     constexpr vm_area(uintptr_t start, unsigned long flags, uintptr_t end,
-                      const fs::rust_inode_handle* mapped_file = nullptr,
-                      std::size_t offset = 0)
+                      fs::dentry_pointer mapped_file = nullptr, std::size_t offset = 0)
         : start{start}
         , end{end}
         , flags{flags}
-        , mapped_file{mapped_file}
+        , mapped_file{std::move(mapped_file)}
         , file_offset{offset} {}
 
     constexpr vm_area(uintptr_t start, unsigned long flags,
-                      const fs::rust_inode_handle* mapped_file = nullptr,
-                      std::size_t offset = 0)
+                      fs::dentry_pointer mapped_file = nullptr, std::size_t offset = 0)
         : start{start}
         , end{start}
         , flags{flags}
-        , mapped_file{mapped_file}
+        , mapped_file{std::move(mapped_file)}
         , file_offset{offset} {}
+
+    inline vm_area(const vm_area& other)
+        : start{other.start}
+        , end{other.end}
+        , flags{other.flags}
+        , mapped_file{d_get(other.mapped_file)}
+        , file_offset{other.file_offset} {}
 };
 
 } // namespace kernel::mem

+ 2 - 1
include/kernel/process.hpp

@@ -164,7 +164,8 @@ class proclist final {
 
 void NORETURN init_scheduler(kernel::mem::paging::pfn_t kernel_stack_pfn);
 /// @return true if returned normally, false if being interrupted
-bool schedule(void);
+bool schedule_now(void);
+bool schedule_now_preempt_disabled();
 void NORETURN schedule_noreturn(void);
 
 void NORETURN freeze(void);

+ 1 - 1
include/kernel/task/thread.hpp

@@ -61,7 +61,7 @@ struct thread {
     int set_thread_area(user::user_desc* ptr);
     int load_thread_area32() const;
 
-    void set_attr(thd_attr_t new_attr);
+    void set_attr(thd_attr_t new_attr, bool forced = false);
 
     void send_signal(signal_list::signo_type signal);
 

+ 13 - 24
include/kernel/vfs.hpp

@@ -61,41 +61,30 @@ extern "C" int fs_mknod(struct dentry* at, mode_t mode, dev_t sn);
 extern "C" int fs_unlink(struct dentry* at);
 extern "C" int fs_symlink(struct dentry* at, const char* target);
 
-extern "C" int fs_statx(const struct rust_inode_handle* inode,
-                        struct statx* stat, unsigned int mask);
-extern "C" int fs_readlink(const struct rust_inode_handle* inode, char* buf,
-                           size_t buf_size);
-extern "C" int fs_truncate(const struct rust_inode_handle* file, size_t size);
-extern "C" size_t fs_read(const struct rust_inode_handle* file, char* buf,
-                          size_t buf_size, size_t offset, size_t n);
-extern "C" size_t fs_write(const struct rust_inode_handle* file,
-                           const char* buf, size_t offset, size_t n);
+extern "C" int fs_statx(struct dentry* file, struct statx* stat, unsigned int mask);
+extern "C" int fs_readlink(struct dentry* file, char* buf, size_t buf_size);
+extern "C" int fs_truncate(struct dentry* file, size_t size);
+extern "C" size_t fs_read(struct dentry* file, char* buf, size_t buf_size, size_t offset, size_t n);
+extern "C" size_t fs_write(struct dentry* file, const char* buf, size_t offset, size_t n);
 
 using readdir_callback_fn = std::function<int(const char*, size_t, ino_t)>;
 
-extern "C" ssize_t fs_readdir(const struct rust_inode_handle* file,
-                              size_t offset,
+extern "C" ssize_t fs_readdir(struct dentry* dir, size_t offset,
                               const readdir_callback_fn* callback);
 
-extern "C" int fs_mount(dentry* mnt, const char* source,
-                        const char* mount_point, const char* fstype,
-                        unsigned long flags, const void* data);
+extern "C" int fs_mount(dentry* mnt, const char* source, const char* mount_point,
+                        const char* fstype, unsigned long flags, const void* data);
 
-extern "C" mode_t r_get_inode_mode(struct rust_inode_handle* inode);
-extern "C" size_t r_get_inode_size(struct rust_inode_handle* inode);
+extern "C" mode_t r_dentry_get_mode(struct dentry* dentry);
+extern "C" size_t r_dentry_get_size(struct dentry* dentry);
 extern "C" bool r_dentry_is_directory(struct dentry* dentry);
 extern "C" bool r_dentry_is_invalid(struct dentry* dentry);
 
-// borrow from dentry->inode
-extern "C" struct rust_inode_handle* r_dentry_get_inode(struct dentry* dentry);
 extern "C" struct dentry* r_get_root_dentry();
 
-#define current_open(...) \
-    fs::open(current_process->fs_context, current_process->cwd, __VA_ARGS__)
+#define current_open(...) fs::open(current_process->fs_context, current_process->cwd, __VA_ARGS__)
 
-std::pair<dentry_pointer, int> open(const fs_context& context,
-                                    const dentry_pointer& cwd,
-                                    types::string_view path,
-                                    bool follow_symlinks = true);
+std::pair<dentry_pointer, int> open(const fs_context& context, const dentry_pointer& cwd,
+                                    types::string_view path, bool follow_symlinks = true);
 
 } // namespace fs

+ 2 - 2
include/kernel/vfs/file.hpp

@@ -82,9 +82,9 @@ struct file {
 struct regular_file : public virtual file {
     virtual ~regular_file() = default;
     std::size_t cursor{};
-    struct rust_inode_handle* ind{};
+    dentry_pointer dentry;
 
-    regular_file(file_flags flags, size_t cursor, rust_inode_handle* ind);
+    regular_file(file_flags flags, size_t cursor, dentry_pointer dentry);
 
     virtual ssize_t read(char* __user buf, size_t n) override;
     virtual ssize_t do_write(const char* __user buf, size_t n) override;

+ 4 - 4
src/boot.s

@@ -128,8 +128,8 @@ start_32bit:
     # read kimage into memory
 	lea -16(%esp), %esp
     mov $KIMAGE_32K_COUNT, %ecx
-    mov $KERNEL_IMAGE_PADDR, 4(%esp) # destination address
-	mov $9, (%esp) # LBA
+    movl $KERNEL_IMAGE_PADDR, 4(%esp) # destination address
+	movl $9, (%esp) # LBA
 
 .Lread_kimage:
 	mov (%esp), %edi
@@ -139,8 +139,8 @@ start_32bit:
     call read_disk
 	mov %ebx, %ecx
 
-    add $0x8000, 4(%esp)
-	add $64, (%esp)
+    addl $0x8000, 4(%esp)
+	addl $64, (%esp)
 
     loop .Lread_kimage
 

+ 5 - 4
src/driver/ahci/command.rs

@@ -16,19 +16,20 @@ pub trait Command {
 }
 
 pub struct IdentifyCommand {
-    pages: [Page; 1],
+    page: Page,
 }
 
 impl IdentifyCommand {
     pub fn new() -> Self {
-        let page = Page::alloc_one();
-        Self { pages: [page] }
+        Self {
+            page: Page::alloc_one(),
+        }
     }
 }
 
 impl Command for IdentifyCommand {
     fn pages(&self) -> &[Page] {
-        &self.pages
+        core::slice::from_ref(&self.page)
     }
 
     fn lba(&self) -> u64 {

+ 35 - 38
src/driver/ahci/control.rs

@@ -1,9 +1,6 @@
-use crate::{
-    kernel::mem::phys::{NoCachePP, PhysPtr},
-    prelude::*,
-};
+use crate::kernel::mem::phys::{NoCachePP, PhysPtr};
 
-use super::{vread, vwrite, GHC_IE};
+use super::{BitsIterator, GHC_IE};
 
 /// An `AdapterControl` is an HBA device Global Host Control block
 ///
@@ -12,7 +9,7 @@ use super::{vread, vwrite, GHC_IE};
 /// All reads and writes to this struct is volatile
 ///
 #[repr(C)]
-pub struct AdapterControl {
+struct AdapterControlData {
     capabilities: u32,
     global_host_control: u32,
     interrupt_status: u32,
@@ -29,50 +26,50 @@ pub struct AdapterControl {
     vendor: [u8; 96],
 }
 
+const CONTROL_CAP: usize = 0;
+const CONTROL_GHC: usize = 1;
+const CONTROL_IS: usize = 2;
+const CONTROL_PI: usize = 3;
+
+pub struct AdapterControl {
+    inner: *mut u32,
+}
+
+/// # Safety
+/// At the same time, exactly one instance of this struct may exist.
+unsafe impl Send for AdapterControl {}
+
 impl AdapterControl {
-    pub fn new<'lt>(addr: usize) -> &'lt mut Self {
-        NoCachePP::new(addr).as_mut()
+    pub fn new(addr: usize) -> Self {
+        Self {
+            inner: NoCachePP::new(addr).as_ptr(),
+        }
     }
 }
 
 impl AdapterControl {
-    pub fn enable_interrupts(&mut self) {
-        let ghc = vread(&self.global_host_control);
-        vwrite(&mut self.global_host_control, ghc | GHC_IE);
+    fn read(&self, off: usize) -> u32 {
+        unsafe { self.inner.offset(off as isize).read_volatile() }
     }
 
-    pub fn implemented_ports(&self) -> ImplementedPortsIter {
-        ImplementedPortsIter::new(vread(&self.ports_implemented))
+    fn write(&self, off: usize, value: u32) {
+        unsafe { self.inner.offset(off as isize).write_volatile(value) }
     }
-}
 
-pub struct ImplementedPortsIter {
-    ports: u32,
-    n: u32,
-}
-
-impl ImplementedPortsIter {
-    fn new(ports: u32) -> Self {
-        Self { ports, n: 0 }
+    pub fn enable_interrupts(&self) {
+        let ghc = self.read(CONTROL_GHC);
+        self.write(CONTROL_GHC, ghc | GHC_IE);
     }
-}
-
-impl Iterator for ImplementedPortsIter {
-    type Item = u32;
 
-    fn next(&mut self) -> Option<Self::Item> {
-        if self.n == 32 {
-            return None;
-        }
+    pub fn implemented_ports(&self) -> BitsIterator {
+        BitsIterator::new(self.read(CONTROL_PI))
+    }
 
-        let have: bool = self.ports & 1 != 0;
-        self.ports >>= 1;
-        self.n += 1;
+    pub fn pending_interrupts(&self) -> BitsIterator {
+        BitsIterator::new(self.read(CONTROL_IS))
+    }
 
-        if have {
-            Some(self.n - 1)
-        } else {
-            self.next()
-        }
+    pub fn clear_interrupt(&self, no: u32) {
+        self.write(CONTROL_IS, 1 << no)
     }
 }

+ 33 - 33
src/driver/ahci/defs.rs

@@ -17,6 +17,33 @@ pub const PORT_CMD_FRE: u32 = 0x00000010;
 pub const PORT_CMD_FR: u32 = 0x00004000;
 pub const PORT_CMD_CR: u32 = 0x00008000;
 
+pub const PORT_IE_DHRE: u32 = 0x00000001;
+pub const PORT_IE_UFE: u32 = 0x00000010;
+pub const PORT_IE_INFE: u32 = 0x04000000;
+pub const PORT_IE_IFE: u32 = 0x08000000;
+pub const PORT_IE_HBDE: u32 = 0x10000000;
+pub const PORT_IE_IBFE: u32 = 0x20000000;
+pub const PORT_IE_TFEE: u32 = 0x40000000;
+
+pub const PORT_IE_DEFAULT: u32 = PORT_IE_DHRE
+    | PORT_IE_UFE
+    | PORT_IE_INFE
+    | PORT_IE_IFE
+    | PORT_IE_HBDE
+    | PORT_IE_IBFE
+    | PORT_IE_TFEE;
+
+pub const PORT_IS_DHRS: u32 = 0x00000001;
+pub const PORT_IS_UFS: u32 = 0x00000010;
+pub const PORT_IS_INFS: u32 = 0x04000000;
+pub const PORT_IS_IFS: u32 = 0x08000000;
+pub const PORT_IS_HBDS: u32 = 0x10000000;
+pub const PORT_IS_IBFS: u32 = 0x20000000;
+pub const PORT_IS_TFES: u32 = 0x40000000;
+
+pub const PORT_IS_ERROR: u32 =
+    PORT_IS_UFS | PORT_IS_INFS | PORT_IS_IFS | PORT_IS_HBDS | PORT_IS_IBFS;
+
 /// A `CommandHeader` is used to send commands to the HBA device
 ///
 /// # Access
@@ -29,47 +56,20 @@ pub struct CommandHeader {
     // [5]: ATAPI
     // [6]: Write
     // [7]: Prefetchable
-    first: u8,
+    pub first: u8,
 
     // [0]: Reset
     // [1]: BIST
     // [2]: Clear busy upon ok
     // [3]: Reserved
     // [4:7]: Port multiplier
-    second: u8,
-
-    prdt_length: u16,
-    bytes_transferred: u32,
-    command_table_base: u64,
-
-    _reserved: [u32; 4],
-}
-
-impl CommandHeader {
-    pub fn clear(&mut self) {
-        self.first = 0;
-        self.second = 0;
-        self.prdt_length = 0;
-        self.bytes_transferred = 0;
-        self.command_table_base = 0;
-        self._reserved = [0; 4];
-    }
-
-    pub fn setup(&mut self, cmdtable_base: u64, prdtlen: u16, write: bool) {
-        self.first = 0x05; // FIS type
-
-        if write {
-            self.first |= 0x40;
-        }
+    pub second: u8,
 
-        self.second = 0x04; // Clear busy upon ok
+    pub prdt_length: u16,
+    pub bytes_transferred: u32,
+    pub command_table_base: u64,
 
-        self.prdt_length = prdtlen;
-        self.bytes_transferred = 0;
-        self.command_table_base = cmdtable_base;
-
-        self._reserved = [0; 4];
-    }
+    pub _reserved: [u32; 4],
 }
 
 pub enum FisType {

+ 102 - 52
src/driver/ahci/mod.rs

@@ -1,5 +1,8 @@
 use crate::{
-    kernel::block::{make_device, BlockDevice},
+    kernel::{
+        block::{make_device, BlockDevice},
+        interrupt::register_irq_handler,
+    },
     prelude::*,
 };
 
@@ -17,100 +20,147 @@ mod control;
 mod defs;
 mod port;
 
-fn vread<T: Sized + Copy>(refval: &T) -> T {
-    unsafe { core::ptr::read_volatile(refval) }
+pub struct BitsIterator {
+    data: u32,
+    n: u32,
 }
 
-fn vwrite<T: Sized + Copy>(refval: &mut T, val: T) {
-    unsafe { core::ptr::write_volatile(refval, val) }
+impl BitsIterator {
+    fn new(data: u32) -> Self {
+        Self { data, n: 0 }
+    }
 }
 
-fn spinwait_clear(refval: &u32, mask: u32) -> KResult<()> {
-    const SPINWAIT_MAX: usize = 1000;
+impl Iterator for BitsIterator {
+    type Item = u32;
 
-    let mut spins = 0;
-    while vread(refval) & mask != 0 {
-        if spins == SPINWAIT_MAX {
-            return Err(EIO);
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.n == 32 {
+            return None;
         }
 
-        spins += 1;
-    }
-
-    Ok(())
-}
-
-fn spinwait_set(refval: &u32, mask: u32) -> KResult<()> {
-    const SPINWAIT_MAX: usize = 1000;
+        let have: bool = self.data & 1 != 0;
+        self.data >>= 1;
+        self.n += 1;
 
-    let mut spins = 0;
-    while vread(refval) & mask != mask {
-        if spins == SPINWAIT_MAX {
-            return Err(EIO);
+        if have {
+            Some(self.n - 1)
+        } else {
+            self.next()
         }
-
-        spins += 1;
     }
+}
+
+fn vread<T: Sized + Copy>(refval: *const T) -> T {
+    unsafe { refval.read_volatile() }
+}
 
-    Ok(())
+fn vwrite<T: Sized + Copy>(refval: *mut T, val: T) {
+    unsafe { refval.write_volatile(val) }
 }
 
-struct Device<'lt, 'port> {
+struct Device {
     control_base: usize,
-    control: &'lt mut AdapterControl,
+    control: AdapterControl,
     // TODO: impl Drop to free pci device
     pcidev: *mut pci_device,
-    ports: Vec<Option<Arc<Mutex<AdapterPort<'port>>>>>,
+    /// # Lock
+    /// Might be accessed from irq handler, use with `lock_irq()`
+    ports: Spin<[Option<Arc<AdapterPort>>; 32]>,
 }
 
-impl<'lt, 'port: 'static> Device<'lt, 'port> {
-    fn probe_ports(&mut self) -> KResult<()> {
-        for nport in self.control.implemented_ports() {
-            let mut port = AdapterPort::<'port>::new(self.control_base, nport);
+/// # Safety
+/// `pcidev` is never accessed from Rust code
+/// TODO!!!: place *mut pci_device in a safe wrapper
+unsafe impl Send for Device {}
+unsafe impl Sync for Device {}
 
+impl Device {
+    fn probe_ports(&self) -> KResult<()> {
+        for nport in self.control.implemented_ports() {
+            let port = Arc::new(AdapterPort::new(self.control_base, nport));
             if !port.status_ok() {
                 continue;
             }
 
-            port.init()?;
-
-            let port = Arc::new(Mutex::new(port));
+            self.ports.lock_irq()[nport as usize] = Some(port.clone());
+            if let Err(e) = (|| -> KResult<()> {
+                port.init()?;
 
-            self.ports[nport as usize] = Some(port.clone());
+                let port = BlockDevice::register_disk(
+                    make_device(8, nport * 16),
+                    2147483647, // TODO: get size from device
+                    port,
+                )?;
 
-            let port = BlockDevice::register_disk(
-                make_device(8, nport * 16),
-                2147483647, // TODO: get size from device
-                port,
-            )?;
+                port.partprobe()?;
 
-            port.partprobe()?;
+                Ok(())
+            })() {
+                self.ports.lock_irq()[nport as usize] = None;
+                println_warn!("probe port {nport} failed with {e}");
+            }
         }
 
         Ok(())
     }
+
+    fn handle_interrupt(&self) {
+        println_debug!("ahci interrupt fired");
+
+        // Safety
+        // `self.ports` is accessed inside irq handler
+        let ports = self.ports.lock();
+        for nport in self.control.pending_interrupts() {
+            println_debug!("processing port {nport}");
+            if let None = ports[nport as usize] {
+                println_warn!("port {nport} not found");
+                continue;
+            }
+
+            let port = ports[nport as usize].as_ref().unwrap();
+            let status = vread(port.interrupt_status());
+
+            if status & PORT_IS_ERROR != 0 {
+                println_warn!("port {nport} SATA error");
+                continue;
+            }
+
+            debug_assert!(status & PORT_IS_DHRS != 0);
+            vwrite(port.interrupt_status(), PORT_IS_DHRS);
+
+            self.control.clear_interrupt(nport);
+            println_debug!("clear port {nport} interrupt flags");
+
+            port.handle_interrupt();
+        }
+    }
 }
 
-impl<'lt: 'static, 'port: 'static> Device<'lt, 'port> {
-    pub fn new(pcidev: *mut pci_device) -> KResult<Self> {
+impl Device {
+    pub fn new(pcidev: *mut pci_device) -> KResult<Arc<Self>> {
         let base = unsafe { *(*pcidev).header_type0() }.bars[PCI_REG_ABAR];
+        let irqno = unsafe { *(*pcidev).header_type0() }.interrupt_line;
 
         // use MMIO
         if base & 0xf != 0 {
             return Err(EIO);
         }
 
-        let mut ports = Vec::with_capacity(32);
-        ports.resize_with(32, || None);
-
-        let mut device = Device {
+        let device = Arc::new(Device {
             control_base: base as usize,
             control: AdapterControl::new(base as usize),
             pcidev,
-            ports,
-        };
+            ports: Spin::new([const { None }; 32]),
+        });
 
         device.control.enable_interrupts();
+
+        let device_irq = device.clone();
+        register_irq_handler(irqno as i32, move || {
+            device_irq.handle_interrupt()
+        })?;
+
         device.probe_ports()?;
 
         Ok(device)
@@ -123,7 +173,7 @@ unsafe extern "C" fn probe_device(pcidev: *mut pci_device) -> i32 {
             // TODO!!!: save device to pci_device
             Box::leak(Box::new(device));
             0
-        },
+        }
         Err(e) => -(e as i32),
     }
 }

+ 235 - 52
src/driver/ahci/port.rs

@@ -1,4 +1,5 @@
-use bindings::EINVAL;
+use alloc::collections::vec_deque::VecDeque;
+use bindings::{EINVAL, EIO};
 
 use crate::prelude::*;
 
@@ -6,14 +7,29 @@ use crate::kernel::block::{BlockDeviceRequest, BlockRequestQueue};
 use crate::kernel::mem::paging::Page;
 
 use crate::kernel::mem::phys::{NoCachePP, PhysPtr};
+use crate::sync::condvar::CondVar;
 
 use super::command::{Command, IdentifyCommand, ReadLBACommand};
 use super::{
-    spinwait_clear, vread, vwrite, CommandHeader, PRDTEntry, ReceivedFis,
-    ATA_DEV_BSY, ATA_DEV_DRQ, FISH2D, PORT_CMD_CR, PORT_CMD_FR, PORT_CMD_FRE,
-    PORT_CMD_ST,
+    vread, vwrite, CommandHeader, PRDTEntry, FISH2D, PORT_CMD_CR, PORT_CMD_FR,
+    PORT_CMD_FRE, PORT_CMD_ST, PORT_IE_DEFAULT,
 };
 
+fn spinwait_clear(refval: *const u32, mask: u32) -> KResult<()> {
+    const SPINWAIT_MAX: usize = 1000;
+
+    let mut spins = 0;
+    while vread(refval) & mask != 0 {
+        if spins == SPINWAIT_MAX {
+            return Err(EIO);
+        }
+
+        spins += 1;
+    }
+
+    Ok(())
+}
+
 /// An `AdapterPort` is an HBA device in AHCI mode.
 ///
 /// # Access
@@ -49,67 +65,215 @@ pub struct AdapterPortData {
     vendor: [u32; 4],
 }
 
-pub struct AdapterPort<'lt> {
+#[derive(Debug, PartialEq, Eq, Clone, Copy)]
+enum SlotState {
+    Idle,
+    Working,
+    Finished,
+    Error,
+}
+
+struct CommandSlotInner {
+    state: SlotState,
+    /// # Usage
+    /// `cmdheader` might be used in irq handler. So in order to wait for
+    /// commands to finish, we should use `lock_irq` on `cmdheader`
+    cmdheader: *mut CommandHeader,
+}
+
+/// # Safety
+/// This is safe because the `cmdheader` is not shared between threads
+unsafe impl Send for CommandSlotInner {}
+
+impl CommandSlotInner {
+    pub fn setup(&mut self, cmdtable_base: u64, prdtlen: u16, write: bool) {
+        let cmdheader = unsafe { self.cmdheader.as_mut().unwrap() };
+        cmdheader.first = 0x05; // FIS type
+
+        if write {
+            cmdheader.first |= 0x40;
+        }
+
+        cmdheader.second = 0x00;
+
+        cmdheader.prdt_length = prdtlen;
+        cmdheader.bytes_transferred = 0;
+        cmdheader.command_table_base = cmdtable_base;
+
+        cmdheader._reserved = [0; 4];
+    }
+}
+
+struct CommandSlot {
+    inner: Spin<CommandSlotInner>,
+    cv: CondVar,
+}
+
+impl CommandSlot {
+    fn new(cmdheader: *mut CommandHeader) -> Self {
+        Self {
+            inner: Spin::new(CommandSlotInner {
+                state: SlotState::Idle,
+                cmdheader,
+            }),
+            cv: CondVar::new(),
+        }
+    }
+}
+
+struct FreeList {
+    free: VecDeque<u32>,
+    working: VecDeque<u32>,
+}
+
+impl FreeList {
+    fn new() -> Self {
+        Self {
+            free: (0..32).collect(),
+            working: VecDeque::new(),
+        }
+    }
+}
+
+pub struct AdapterPort {
     nport: u32,
-    data: &'lt mut AdapterPortData,
+    regs: *mut (),
     page: Page,
-    cmdheaders: &'lt mut [CommandHeader; 32],
-    recv_fis: &'lt mut ReceivedFis,
+    slots: [CommandSlot; 32],
+    free_list: Spin<FreeList>,
+    free_list_cv: CondVar,
 }
 
-impl<'lt> AdapterPort<'lt> {
+/// # Safety
+/// This is safe because the `AdapterPort` can be accessed by only one thread at the same time
+unsafe impl Send for AdapterPort {}
+unsafe impl Sync for AdapterPort {}
+
+impl AdapterPort {
     pub fn new(base: usize, nport: u32) -> Self {
         let page = Page::alloc_one();
+        let cmdheaders_start = page.as_cached().as_ptr::<CommandHeader>();
+
         Self {
             nport,
-            data: NoCachePP::new(base + 0x100 + 0x80 * nport as usize).as_mut(),
-            cmdheaders: page.as_cached().as_mut(),
-            recv_fis: page.as_cached().offset(0x400).as_mut(),
+            regs: NoCachePP::new(base + 0x100 + 0x80 * nport as usize).as_ptr(),
+            slots: core::array::from_fn(|index| {
+                CommandSlot::new(unsafe {
+                    cmdheaders_start.offset(index as isize)
+                })
+            }),
+            free_list: Spin::new(FreeList::new()),
+            free_list_cv: CondVar::new(),
             page,
         }
     }
 }
 
-impl<'lt> AdapterPort<'lt> {
+impl AdapterPort {
+    fn command_list_base(&self) -> *mut u64 {
+        unsafe { self.regs.byte_offset(0x00).cast() }
+    }
+
+    fn fis_base(&self) -> *mut u64 {
+        unsafe { self.regs.byte_offset(0x08).cast() }
+    }
+
+    fn sata_status(&self) -> *mut u32 {
+        unsafe { self.regs.byte_offset(0x28).cast() }
+    }
+
+    fn command_status(&self) -> *mut u32 {
+        unsafe { self.regs.byte_offset(0x18).cast() }
+    }
+
+    fn command_issue(&self) -> *mut u32 {
+        unsafe { self.regs.byte_offset(0x38).cast() }
+    }
+
+    pub fn interrupt_status(&self) -> *mut u32 {
+        unsafe { self.regs.byte_offset(0x10).cast() }
+    }
+
+    pub fn interrupt_enable(&self) -> *mut u32 {
+        unsafe { self.regs.byte_offset(0x14).cast() }
+    }
+
     pub fn status_ok(&self) -> bool {
-        self.data.sata_status & 0xf == 0x3
+        vread(self.sata_status()) & 0xf == 0x3
     }
 
-    fn stop_command(&mut self) -> KResult<()> {
-        let cmd_status = vread(&self.data.command_status);
+    fn get_free_slot(&self) -> usize {
+        let mut free_list = self.free_list.lock_irq();
+
+        loop {
+            match free_list.free.pop_front() {
+                Some(slot) => {
+                    free_list.working.push_back(slot);
+                    break slot as usize;
+                }
+                None => {
+                    self.free_list_cv.wait(&mut free_list, false);
+                }
+            }
+        }
+    }
+
+    fn release_free_slot(&self, slot: u32) {
+        self.free_list.lock().free.push_back(slot);
+        self.free_list_cv.notify_one();
+    }
+
+    pub fn handle_interrupt(&self) {
+        let ci = vread(self.command_issue());
+
+        // no need to use `lock_irq()` inside interrupt handler
+        let mut free_list = self.free_list.lock();
+
+        free_list.working.retain(|&n| {
+            if ci & (1 << n) != 0 {
+                return true;
+            }
+
+            let slot = &self.slots[n as usize];
+
+            println_debug!("slot{n} finished");
+
+            // TODO: check error
+            slot.inner.lock().state = SlotState::Finished;
+            slot.cv.notify_all();
+
+            false
+        });
+    }
+
+    fn stop_command(&self) -> KResult<()> {
         vwrite(
-            &mut self.data.command_status,
-            cmd_status & !(PORT_CMD_ST | PORT_CMD_FRE),
+            self.command_status(),
+            vread(self.command_status()) & !(PORT_CMD_ST | PORT_CMD_FRE),
         );
 
-        spinwait_clear(&self.data.command_status, PORT_CMD_CR | PORT_CMD_FR)
+        spinwait_clear(self.command_status(), PORT_CMD_CR | PORT_CMD_FR)
     }
 
-    fn start_command(&mut self) -> KResult<()> {
-        spinwait_clear(&self.data.command_status, PORT_CMD_CR)?;
+    fn start_command(&self) -> KResult<()> {
+        spinwait_clear(self.command_status(), PORT_CMD_CR)?;
 
-        let cmd_status = vread(&self.data.command_status);
+        let cmd_status = vread(self.command_status());
         vwrite(
-            &mut self.data.command_status,
+            self.command_status(),
             cmd_status | PORT_CMD_ST | PORT_CMD_FRE,
         );
 
         Ok(())
     }
 
-    fn send_command(&mut self, cmd: &impl Command) -> KResult<()> {
-        let pages = cmd.pages();
-
-        // TODO: get an available command slot
-        let cmdslot = 0;
+    /// # Might Sleep
+    /// This function **might sleep**, so call it in a preemptible context
+    fn send_command(&self, cmd: &impl Command) -> KResult<()> {
+        might_sleep!();
 
+        let pages = cmd.pages();
         let cmdtable_page = Page::alloc_one();
-        self.cmdheaders[cmdslot].clear();
-        self.cmdheaders[cmdslot].setup(
-            cmdtable_page.as_phys() as u64,
-            pages.len() as u16,
-            cmd.write(),
-        );
 
         let command_fis: &mut FISH2D = cmdtable_page.as_cached().as_mut();
         command_fis.setup(cmd.cmd(), cmd.lba(), cmd.count());
@@ -121,20 +285,43 @@ impl<'lt> AdapterPort<'lt> {
             prdt[idx].setup(page);
         }
 
-        // clear received fis?
+        let slot_index = self.get_free_slot();
+        let slot_object = &self.slots[slot_index];
+        let mut slot = slot_object.inner.lock_irq();
+
+        slot.setup(
+            cmdtable_page.as_phys() as u64,
+            pages.len() as u16,
+            cmd.write(),
+        );
+        slot.state = SlotState::Working;
+
+        // should we clear received fis here?
+        debug_assert!(vread(self.command_issue()) & (1 << slot_index) == 0);
 
-        // wait until port is not busy
-        spinwait_clear(&self.data.task_file_data, ATA_DEV_BSY | ATA_DEV_DRQ)?;
+        println_debug!("slot{slot_index} working");
+        vwrite(self.command_issue(), 1 << slot_index);
 
-        vwrite(&mut self.data.command_issue, 1 << cmdslot);
-        spinwait_clear(&self.data.command_issue, 1 << cmdslot)?;
+        while slot.state == SlotState::Working {
+            slot_object.cv.wait(&mut slot, false);
+        }
 
-        // TODO: check and wait interrupt
+        let state = slot.state;
+        slot.state = SlotState::Idle;
 
-        Ok(())
+        self.release_free_slot(slot_index as u32);
+
+        drop(slot);
+        println_debug!("slot{slot_index} released");
+
+        match state {
+            SlotState::Finished => Ok(()),
+            SlotState::Error => Err(EIO),
+            _ => panic!("Invalid slot state"),
+        }
     }
 
-    fn identify(&mut self) -> KResult<()> {
+    fn identify(&self) -> KResult<()> {
         let cmd = IdentifyCommand::new();
 
         // TODO: check returned data
@@ -143,17 +330,13 @@ impl<'lt> AdapterPort<'lt> {
         Ok(())
     }
 
-    pub fn init(&mut self) -> KResult<()> {
+    pub fn init(&self) -> KResult<()> {
         self.stop_command()?;
 
-        // TODO: use interrupt
-        // this is the PxIE register, setting bits here will make
-        //      it generate corresponding interrupts in PxIS
-        //
-        // port->interrupt_enable = 1;
+        vwrite(self.interrupt_enable(), PORT_IE_DEFAULT);
 
-        vwrite(&mut self.data.command_list_base, self.page.as_phys() as u64);
-        vwrite(&mut self.data.fis_base, self.page.as_phys() as u64 + 0x400);
+        vwrite(self.command_list_base(), self.page.as_phys() as u64);
+        vwrite(self.fis_base(), self.page.as_phys() as u64 + 0x400);
 
         self.start_command()?;
 
@@ -167,12 +350,12 @@ impl<'lt> AdapterPort<'lt> {
     }
 }
 
-impl<'lt> BlockRequestQueue for AdapterPort<'lt> {
+impl BlockRequestQueue for AdapterPort {
     fn max_request_pages(&self) -> u64 {
         1024
     }
 
-    fn submit(&mut self, req: BlockDeviceRequest) -> KResult<()> {
+    fn submit(&self, req: BlockDeviceRequest) -> KResult<()> {
         // TODO: check disk size limit using newtype
         if req.count > 65535 {
             return Err(EINVAL);

+ 23 - 11
src/driver/e1000e.rs

@@ -1,3 +1,5 @@
+use crate::prelude::*;
+
 use crate::bindings::root::kernel::hw::pci;
 use crate::kernel::interrupt::register_irq_handler;
 use crate::kernel::mem::paging::copy_to_page;
@@ -56,6 +58,23 @@ fn test(val: u32, bit: u32) -> bool {
     (val & bit) == bit
 }
 
+struct PrintableBytes<'a>(&'a [u8]);
+
+impl core::fmt::Debug for PrintableBytes<'_> {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        write!(f, "PrintableBytes {{")?;
+        for chunk in self.0.chunks(16) {
+            for &byte in chunk {
+                write!(f, "{byte} ")?;
+            }
+            write!(f, "\n")?;
+        }
+        write!(f, "}}")?;
+
+        Ok(())
+    }
+}
+
 impl netdev::Netdev for E1000eDev {
     fn mac(&self) -> netdev::Mac {
         self.mac
@@ -151,17 +170,10 @@ impl netdev::Netdev for E1000eDev {
                 )
             };
 
-            use crate::{dont_check, print, println};
-            dont_check!(println!("==== e1000e: received packet ===="));
-
-            for i in 0..len {
-                if i % 16 == 0 {
-                    dont_check!(println!());
-                }
-                dont_check!(print!("{:02x} ", data[i]));
-            }
-
-            dont_check!(println!("\n\n====  e1000e: end of packet  ===="));
+            println_debug!(
+                "e1000e: received {len} bytes, {:?}",
+                PrintableBytes(data)
+            );
             self.rx_tail = Some(next_tail);
         }
 

+ 131 - 136
src/fs/fat32.rs

@@ -1,4 +1,10 @@
-use alloc::{sync::Arc, vec::Vec};
+use core::sync::atomic::Ordering;
+
+use alloc::{
+    collections::btree_map::BTreeMap,
+    sync::{Arc, Weak},
+    vec::Vec,
+};
 use bindings::{EINVAL, EIO, S_IFDIR, S_IFREG};
 
 use itertools::Itertools;
@@ -10,7 +16,7 @@ use crate::{
         mem::{paging::Page, phys::PhysPtr},
         vfs::{
             dentry::Dentry,
-            inode::{Ino, Inode, InodeCache, InodeOps},
+            inode::{define_struct_inode, Ino, Inode, InodeData},
             mount::{register_filesystem, Mount, MountCreator},
             vfs::Vfs,
             DevId, ReadDirCallback,
@@ -131,19 +137,35 @@ struct Bootsector {
     mbr_signature: u16,
 }
 
+impl_any!(FatFs);
 /// # Lock order
-/// 1. FatFs
 /// 2. FatTable
 /// 3. Inodes
 ///
 struct FatFs {
-    device: Arc<BlockDevice>,
-    icache: Mutex<InodeCache<FatFs>>,
     sectors_per_cluster: u8,
     rootdir_cluster: ClusterNo,
     data_start: u64,
-    fat: Mutex<Vec<ClusterNo>>,
-    volume_label: String,
+    volume_label: [u8; 11],
+
+    device: Arc<BlockDevice>,
+    fat: RwSemaphore<Vec<ClusterNo>>,
+    weak: Weak<FatFs>,
+    icache: BTreeMap<Ino, FatInode>,
+}
+
+impl Vfs for FatFs {
+    fn io_blksize(&self) -> usize {
+        4096
+    }
+
+    fn fs_devid(&self) -> DevId {
+        self.device.devid()
+    }
+
+    fn is_read_only(&self) -> bool {
+        true
+    }
 }
 
 impl FatFs {
@@ -151,8 +173,7 @@ impl FatFs {
         let cluster = cluster - 2;
 
         let rq = BlockDeviceRequest {
-            sector: self.data_start as u64
-                + cluster as u64 * self.sectors_per_cluster as u64,
+            sector: self.data_start as u64 + cluster as u64 * self.sectors_per_cluster as u64,
             count: self.sectors_per_cluster as u64,
             buffer: core::slice::from_ref(buf),
         };
@@ -160,57 +181,34 @@ impl FatFs {
 
         Ok(())
     }
-}
-
-impl InodeCache<FatFs> {
-    fn get_or_alloc(
-        &mut self,
-        ino: Ino,
-        is_directory: bool,
-        size: u64,
-    ) -> KResult<Arc<Inode>> {
-        self.get(ino).map(|inode| Ok(inode)).unwrap_or_else(|| {
-            let nlink;
-            let mut mode = 0o777;
-
-            let ops: Box<dyn InodeOps>;
-
-            if is_directory {
-                nlink = 2;
-                mode |= S_IFDIR;
-                ops = Box::new(DirOps);
-            } else {
-                nlink = 1;
-                mode |= S_IFREG;
-                ops = Box::new(FileOps);
-            }
-
-            let mut inode = self.alloc(ino, ops);
-            let inode_mut = unsafe { Arc::get_mut_unchecked(&mut inode) };
-            let inode_idata = inode_mut.idata.get_mut();
 
-            inode_idata.mode = mode;
-            inode_idata.nlink = nlink;
-            inode_idata.size = size;
-
-            self.submit(&inode)?;
-
-            Ok(inode)
-        })
+    fn get_or_alloc_inode(&self, ino: Ino, is_directory: bool, size: u32) -> Arc<dyn Inode> {
+        self.icache
+            .get(&ino)
+            .cloned()
+            .map(FatInode::unwrap)
+            .unwrap_or_else(|| {
+                if is_directory {
+                    DirInode::new(ino, self.weak.clone(), size)
+                } else {
+                    FileInode::new(ino, self.weak.clone(), size)
+                }
+            })
     }
 }
 
 impl FatFs {
-    pub fn create(device: DevId) -> KResult<(Arc<Self>, Arc<Inode>)> {
+    pub fn create(device: DevId) -> KResult<(Arc<Self>, Arc<dyn Inode>)> {
         let device = BlockDevice::get(device)?;
-        let mut fatfs_arc = Arc::new_cyclic(|weak| Self {
+        let mut fatfs_arc = Arc::new_cyclic(|weak: &Weak<FatFs>| Self {
             device,
-            icache: Mutex::new(InodeCache::new(weak.clone())),
             sectors_per_cluster: 0,
             rootdir_cluster: 0,
             data_start: 0,
-            fat: Mutex::new(Vec::new()),
-            volume_label: String::new(),
+            fat: RwSemaphore::new(Vec::new()),
+            weak: weak.clone(),
+            icache: BTreeMap::new(),
+            volume_label: [0; 11],
         });
 
         let fatfs = unsafe { Arc::get_mut_unchecked(&mut fatfs_arc) };
@@ -221,13 +219,13 @@ impl FatFs {
 
         fatfs.sectors_per_cluster = info.sectors_per_cluster;
         fatfs.rootdir_cluster = info.root_cluster;
-        fatfs.data_start = info.reserved_sectors as u64
-            + info.fat_copies as u64 * info.sectors_per_fat as u64;
+        fatfs.data_start =
+            info.reserved_sectors as u64 + info.fat_copies as u64 * info.sectors_per_fat as u64;
 
         let fat = fatfs.fat.get_mut();
+
         fat.resize(
-            512 * info.sectors_per_fat as usize
-                / core::mem::size_of::<ClusterNo>(),
+            512 * info.sectors_per_fat as usize / core::mem::size_of::<ClusterNo>(),
             0,
         );
 
@@ -242,51 +240,21 @@ impl FatFs {
             return Err(EIO);
         }
 
-        fatfs.volume_label = String::from(
-            str::from_utf8(&info.volume_label)
-                .map_err(|_| EINVAL)?
-                .trim_end_matches(char::from(' ')),
-        );
-
-        let root_dir_cluster_count =
-            ClusterIterator::new(&fat, fatfs.rootdir_cluster).count();
-
-        let root_inode = {
-            let icache = fatfs.icache.get_mut();
-
-            let mut inode =
-                icache.alloc(info.root_cluster as Ino, Box::new(DirOps));
-            let inode_mut = unsafe { Arc::get_mut_unchecked(&mut inode) };
-            let inode_idata = inode_mut.idata.get_mut();
-
-            inode_idata.mode = S_IFDIR | 0o777;
-            inode_idata.nlink = 2;
-            inode_idata.size = root_dir_cluster_count as u64
-                * info.sectors_per_cluster as u64
-                * 512;
+        info.volume_label
+            .iter()
+            .take_while(|&&c| c != ' ' as u8)
+            .take(11)
+            .enumerate()
+            .for_each(|(idx, c)| fatfs.volume_label[idx] = *c);
 
-            icache.submit(&inode)?;
-            inode
-        };
+        let root_dir_cluster_count = ClusterIterator::new(fat, fatfs.rootdir_cluster).count();
+        let root_dir_size = root_dir_cluster_count as u32 * info.sectors_per_cluster as u32 * 512;
+        let root_inode = DirInode::new(info.root_cluster as Ino, fatfs.weak.clone(), root_dir_size);
 
         Ok((fatfs_arc, root_inode))
     }
 }
 
-impl Vfs for FatFs {
-    fn io_blksize(&self) -> usize {
-        4096
-    }
-
-    fn fs_devid(&self) -> DevId {
-        self.device.devid()
-    }
-
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-}
-
 struct ClusterIterator<'fat> {
     fat: &'fat [ClusterNo],
     cur: ClusterNo,
@@ -371,24 +339,47 @@ impl<'fat> Iterator for ClusterIterator<'fat> {
     }
 }
 
-struct FileOps;
-impl InodeOps for FileOps {
-    fn as_any(&self) -> &dyn Any {
-        self
+#[derive(Clone)]
+enum FatInode {
+    File(Arc<FileInode>),
+    Dir(Arc<DirInode>),
+}
+
+impl FatInode {
+    fn unwrap(self) -> Arc<dyn Inode> {
+        match self {
+            FatInode::File(inode) => inode,
+            FatInode::Dir(inode) => inode,
+        }
     }
+}
 
-    fn read(
-        &self,
-        inode: &Inode,
-        buffer: &mut dyn Buffer,
-        offset: usize,
-    ) -> KResult<usize> {
-        let vfs = inode.vfs.upgrade().ok_or(EIO)?;
-        let vfs = vfs.as_any().downcast_ref::<FatFs>().ok_or(EINVAL)?;
-        let fat = vfs.fat.lock();
+define_struct_inode! {
+    struct FileInode;
+}
+
+impl FileInode {
+    fn new(ino: Ino, weak: Weak<FatFs>, size: u32) -> Arc<Self> {
+        let inode = Arc::new(Self {
+            idata: InodeData::new(ino, weak),
+        });
+
+        // Safety: We are initializing the inode
+        inode.nlink.store(1, Ordering::Relaxed);
+        inode.mode.store(S_IFREG | 0o777, Ordering::Relaxed);
+        inode.size.store(size as u64, Ordering::Relaxed);
+
+        inode
+    }
+}
 
-        let iter = ClusterIterator::new(&fat, inode.ino as ClusterNo)
-            .read(vfs, offset);
+impl Inode for FileInode {
+    fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize> {
+        let vfs = self.vfs.upgrade().ok_or(EIO)?;
+        let vfs = vfs.as_any().downcast_ref::<FatFs>().unwrap();
+        let fat = vfs.fat.lock_shared();
+
+        let iter = ClusterIterator::new(fat.as_ref(), self.ino as ClusterNo).read(vfs, offset);
 
         for data in iter {
             if buffer.fill(data?)?.should_stop() {
@@ -400,23 +391,32 @@ impl InodeOps for FileOps {
     }
 }
 
-struct DirOps;
-impl InodeOps for DirOps {
-    fn as_any(&self) -> &dyn Any {
-        self
+define_struct_inode! {
+    struct DirInode;
+}
+
+impl DirInode {
+    fn new(ino: Ino, weak: Weak<FatFs>, size: u32) -> Arc<Self> {
+        let inode = Arc::new(Self {
+            idata: InodeData::new(ino, weak),
+        });
+
+        // Safety: We are initializing the inode
+        inode.nlink.store(2, Ordering::Relaxed);
+        inode.mode.store(S_IFDIR | 0o777, Ordering::Relaxed);
+        inode.size.store(size as u64, Ordering::Relaxed);
+
+        inode
     }
+}
 
-    fn lookup(
-        &self,
-        dir: &Inode,
-        dentry: &Arc<Dentry>,
-    ) -> KResult<Option<Arc<Inode>>> {
-        let vfs = dir.vfs.upgrade().ok_or(EIO)?;
-        let vfs = vfs.as_any().downcast_ref::<FatFs>().ok_or(EINVAL)?;
-        let fat = vfs.fat.lock();
+impl Inode for DirInode {
+    fn lookup(&self, dentry: &Arc<Dentry>) -> KResult<Option<Arc<dyn Inode>>> {
+        let vfs = self.vfs.upgrade().ok_or(EIO)?;
+        let vfs = vfs.as_any().downcast_ref::<FatFs>().unwrap();
+        let fat = vfs.fat.lock_shared();
 
-        let mut entries =
-            ClusterIterator::new(&fat, dir.ino as ClusterNo).dirs(vfs, 0);
+        let mut entries = ClusterIterator::new(fat.as_ref(), self.ino as ClusterNo).dirs(vfs, 0);
 
         let entry = entries.find_map(|entry| {
             if entry.is_err() {
@@ -438,28 +438,27 @@ impl InodeOps for DirOps {
             Some(Ok(entry)) => {
                 let ino = entry.ino();
 
-                Ok(Some(vfs.icache.lock().get_or_alloc(
+                Ok(Some(vfs.get_or_alloc_inode(
                     ino,
                     entry.is_directory(),
-                    entry.size as u64,
-                )?))
+                    entry.size,
+                )))
             }
         }
     }
 
     fn readdir<'cb, 'r: 'cb>(
         &'r self,
-        dir: &'r Inode,
         offset: usize,
         callback: &ReadDirCallback<'cb>,
     ) -> KResult<usize> {
-        let vfs = dir.vfs.upgrade().ok_or(EIO)?;
-        let vfs = vfs.as_any().downcast_ref::<FatFs>().ok_or(EINVAL)?;
-        let fat = vfs.fat.lock();
+        let vfs = self.vfs.upgrade().ok_or(EIO)?;
+        let vfs = vfs.as_any().downcast_ref::<FatFs>().unwrap();
+        let fat = vfs.fat.lock_shared();
 
         const ENTRY_SIZE: usize = core::mem::size_of::<FatDirectoryEntry>();
         let cluster_iter =
-            ClusterIterator::new(&fat, dir.ino as ClusterNo).dirs(vfs, offset);
+            ClusterIterator::new(fat.as_ref(), self.ino as ClusterNo).dirs(vfs, offset);
 
         let mut nread = 0;
         for entry in cluster_iter {
@@ -473,11 +472,7 @@ impl InodeOps for DirOps {
             let ino = entry.ino();
             let name = entry.filename();
 
-            vfs.icache.lock().get_or_alloc(
-                ino,
-                entry.is_directory(),
-                entry.size as u64,
-            )?;
+            vfs.get_or_alloc_inode(ino, entry.is_directory(), entry.size);
 
             if callback(name.as_ref(), ino).is_err() {
                 break;
@@ -507,5 +502,5 @@ impl MountCreator for FatMountCreator {
 }
 
 pub fn init() {
-    register_filesystem("fat32", Box::new(FatMountCreator)).unwrap();
+    register_filesystem("fat32", Arc::new(FatMountCreator)).unwrap();
 }

+ 130 - 132
src/fs/procfs.rs

@@ -1,7 +1,10 @@
-use core::sync::atomic::Ordering;
-
-use alloc::sync::{Arc, Weak};
+use alloc::{
+    collections::btree_map::BTreeMap,
+    sync::{Arc, Weak},
+};
 use bindings::{EACCES, ENOTDIR, S_IFDIR, S_IFREG};
+use core::sync::atomic::Ordering;
+use lazy_static::lazy_static;
 
 use crate::{
     io::Buffer,
@@ -9,13 +12,14 @@ use crate::{
         mem::paging::{Page, PageBuffer},
         vfs::{
             dentry::Dentry,
-            inode::{AtomicIno, Inode, InodeCache, InodeData, InodeOps},
+            inode::{define_struct_inode, AtomicIno, Ino, Inode, InodeData},
             mount::{dump_mounts, register_filesystem, Mount, MountCreator},
             vfs::Vfs,
             DevId, ReadDirCallback,
         },
     },
     prelude::*,
+    sync::Locked,
 };
 
 fn split_len_offset(data: &[u8], len: usize, offset: usize) -> Option<&[u8]> {
@@ -24,8 +28,6 @@ fn split_len_offset(data: &[u8], len: usize, offset: usize) -> Option<&[u8]> {
     real_data.split_at_checked(offset).map(|(_, data)| data)
 }
 
-pub struct ProcFsNode(Arc<Inode>);
-
 pub trait ProcFsFile: Send + Sync {
     fn can_read(&self) -> bool {
         false
@@ -44,21 +46,57 @@ pub trait ProcFsFile: Send + Sync {
     }
 }
 
-struct ProcFsFileOps {
-    file: Box<dyn ProcFsFile>,
+pub enum ProcFsNode {
+    File(Arc<FileInode>),
+    Dir(Arc<DirInode>),
 }
 
-impl InodeOps for ProcFsFileOps {
-    fn as_any(&self) -> &dyn Any {
-        self
+impl ProcFsNode {
+    fn unwrap(&self) -> Arc<dyn Inode> {
+        match self {
+            ProcFsNode::File(inode) => inode.clone(),
+            ProcFsNode::Dir(inode) => inode.clone(),
+        }
     }
 
-    fn read(
-        &self,
-        _: &Inode,
-        buffer: &mut dyn Buffer,
-        offset: usize,
-    ) -> KResult<usize> {
+    fn ino(&self) -> Ino {
+        match self {
+            ProcFsNode::File(inode) => inode.ino,
+            ProcFsNode::Dir(inode) => inode.ino,
+        }
+    }
+}
+
+define_struct_inode! {
+    struct FileInode {
+        file: Box<dyn ProcFsFile>,
+    }
+}
+
+impl FileInode {
+    pub fn new(ino: Ino, vfs: Weak<ProcFs>, file: Box<dyn ProcFsFile>) -> Arc<Self> {
+        let mut mode = S_IFREG;
+        if file.can_read() {
+            mode |= 0o444;
+        }
+        if file.can_write() {
+            mode |= 0o200;
+        }
+
+        let inode = Self {
+            idata: InodeData::new(ino, vfs),
+            file,
+        };
+
+        inode.idata.mode.store(mode, Ordering::Relaxed);
+        inode.idata.nlink.store(1, Ordering::Relaxed);
+
+        Arc::new(inode)
+    }
+}
+
+impl Inode for FileInode {
+    fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize> {
         if !self.file.can_read() {
             return Err(EACCES);
         }
@@ -75,47 +113,55 @@ impl InodeOps for ProcFsFileOps {
     }
 }
 
-struct ProcFsDirectory {
-    entries: Mutex<Vec<(Arc<[u8]>, ProcFsNode)>>,
+define_struct_inode! {
+    struct DirInode {
+        entries: Locked<Vec<(Arc<[u8]>, ProcFsNode)>, ()>,
+    }
 }
 
-impl InodeOps for ProcFsDirectory {
-    fn as_any(&self) -> &dyn Any {
-        self
+impl DirInode {
+    pub fn new(ino: Ino, vfs: Weak<ProcFs>) -> Arc<Self> {
+        Self::new_locked(ino, vfs, |inode, rwsem| unsafe {
+            addr_of_mut_field!(inode, entries).write(Locked::new(vec![], rwsem));
+            addr_of_mut_field!(inode, mode).write((S_IFDIR | 0o755).into());
+            addr_of_mut_field!(inode, nlink).write(1.into());
+        })
     }
+}
 
-    fn lookup(
-        &self,
-        _: &Inode,
-        dentry: &Arc<Dentry>,
-    ) -> KResult<Option<Arc<Inode>>> {
-        Ok(self.entries.lock().iter().find_map(|(name, node)| {
-            name.as_ref()
-                .eq(dentry.name().as_ref())
-                .then(|| node.0.clone())
-        }))
+impl Inode for DirInode {
+    fn lookup(&self, dentry: &Arc<Dentry>) -> KResult<Option<Arc<dyn Inode>>> {
+        let lock = self.rwsem.lock_shared();
+        Ok(self
+            .entries
+            .access(lock.as_ref())
+            .iter()
+            .find_map(|(name, node)| {
+                name.as_ref()
+                    .eq(dentry.name().as_ref())
+                    .then(|| node.unwrap())
+            }))
     }
 
     fn readdir<'cb, 'r: 'cb>(
-        &self,
-        _: &Inode,
+        &'r self,
         offset: usize,
         callback: &ReadDirCallback<'cb>,
     ) -> KResult<usize> {
+        let lock = self.rwsem.lock_shared();
         Ok(self
             .entries
-            .lock()
+            .access(lock.as_ref())
             .iter()
             .skip(offset)
-            .take_while(|(name, ProcFsNode(inode))| {
-                callback(name, inode.ino).is_ok()
-            })
+            .take_while(|(name, node)| callback(name, node.ino()).is_ok())
             .count())
     }
 }
 
+impl_any!(ProcFs);
 pub struct ProcFs {
-    root_node: Arc<Inode>,
+    root_node: Arc<DirInode>,
     next_ino: AtomicIno,
 }
 
@@ -128,27 +174,32 @@ impl Vfs for ProcFs {
         10
     }
 
-    fn as_any(&self) -> &dyn Any {
-        self
+    fn is_read_only(&self) -> bool {
+        false
     }
 }
 
-static mut GLOBAL_PROCFS: Option<Arc<ProcFs>> = None;
-static mut ICACHE: Option<InodeCache<ProcFs>> = None;
+lazy_static! {
+    static ref ICACHE: Spin<BTreeMap<Ino, ProcFsNode>> = Spin::new(BTreeMap::new());
+    static ref GLOBAL_PROCFS: Arc<ProcFs> = {
+        let fs: Arc<ProcFs> = Arc::new_cyclic(|weak: &Weak<ProcFs>| ProcFs {
+            root_node: DirInode::new(0, weak.clone()),
+            next_ino: AtomicIno::new(1),
+        });
 
-fn get_icache() -> &'static InodeCache<ProcFs> {
-    unsafe { ICACHE.as_ref().unwrap() }
+        fs
+    };
 }
 
 struct ProcFsMountCreator;
 
 impl ProcFsMountCreator {
     pub fn get() -> Arc<ProcFs> {
-        unsafe { GLOBAL_PROCFS.as_ref().cloned().unwrap() }
+        GLOBAL_PROCFS.clone()
     }
 
     pub fn get_weak() -> Weak<ProcFs> {
-        unsafe { GLOBAL_PROCFS.as_ref().map(Arc::downgrade).unwrap() }
+        Arc::downgrade(&GLOBAL_PROCFS)
     }
 }
 
@@ -170,7 +221,7 @@ pub fn root() -> ProcFsNode {
     let vfs = ProcFsMountCreator::get();
     let root = vfs.root_node.clone();
 
-    ProcFsNode(root)
+    ProcFsNode::Dir(root)
 }
 
 pub fn creat(
@@ -178,69 +229,47 @@ pub fn creat(
     name: &Arc<[u8]>,
     file: Box<dyn ProcFsFile>,
 ) -> KResult<ProcFsNode> {
-    let mut mode = S_IFREG;
-    if file.can_read() {
-        mode |= 0o444;
-    }
-    if file.can_write() {
-        mode |= 0o200;
-    }
-
-    let dir = parent
-        .0
-        .ops
-        .as_any()
-        .downcast_ref::<ProcFsDirectory>()
-        .ok_or(ENOTDIR)?;
+    let parent = match parent {
+        ProcFsNode::File(_) => return Err(ENOTDIR),
+        ProcFsNode::Dir(parent) => parent,
+    };
 
     let fs = ProcFsMountCreator::get();
-    let ino = fs.next_ino.fetch_add(1, Ordering::SeqCst);
-
-    let inode = get_icache().alloc(ino, Box::new(ProcFsFileOps { file }));
+    let ino = fs.next_ino.fetch_add(1, Ordering::Relaxed);
 
-    inode.idata.lock().mode = mode;
-    inode.idata.lock().nlink = 1;
+    let inode = FileInode::new(ino, Arc::downgrade(&fs), file);
 
-    dir.entries
-        .lock()
-        .push((name.clone(), ProcFsNode(inode.clone())));
+    {
+        let mut lock = parent.idata.rwsem.lock();
+        parent
+            .entries
+            .access_mut(lock.as_mut())
+            .push((name.clone(), ProcFsNode::File(inode.clone())));
+    }
 
-    Ok(ProcFsNode(inode))
+    Ok(ProcFsNode::File(inode))
 }
 
 pub fn mkdir(parent: &ProcFsNode, name: &[u8]) -> KResult<ProcFsNode> {
-    let dir = parent
-        .0
-        .ops
-        .as_any()
-        .downcast_ref::<ProcFsDirectory>()
-        .ok_or(ENOTDIR)?;
-
-    let ino = ProcFsMountCreator::get()
-        .next_ino
-        .fetch_add(1, Ordering::SeqCst);
-
-    let inode = get_icache().alloc(
-        ino,
-        Box::new(ProcFsDirectory {
-            entries: Mutex::new(vec![]),
-        }),
-    );
+    let parent = match parent {
+        ProcFsNode::File(_) => return Err(ENOTDIR),
+        ProcFsNode::Dir(parent) => parent,
+    };
 
-    {
-        let mut idata = inode.idata.lock();
-        idata.nlink = 2;
-        idata.mode = S_IFDIR | 0o755;
-    }
+    let fs = ProcFsMountCreator::get();
+    let ino = fs.next_ino.fetch_add(1, Ordering::Relaxed);
+
+    let inode = DirInode::new(ino, Arc::downgrade(&fs));
 
-    dir.entries
-        .lock()
-        .push((Arc::from(name), ProcFsNode(inode.clone())));
+    parent
+        .entries
+        .access_mut(inode.rwsem.lock().as_mut())
+        .push((Arc::from(name), ProcFsNode::Dir(inode.clone())));
 
-    Ok(ProcFsNode(inode))
+    Ok(ProcFsNode::Dir(inode))
 }
 
-struct DumpMountsFile {}
+struct DumpMountsFile;
 impl ProcFsFile for DumpMountsFile {
     fn can_read(&self) -> bool {
         true
@@ -254,43 +283,12 @@ impl ProcFsFile for DumpMountsFile {
 }
 
 pub fn init() {
-    let dir = ProcFsDirectory {
-        entries: Mutex::new(vec![]),
-    };
-
-    let fs: Arc<ProcFs> = Arc::new_cyclic(|weak: &Weak<ProcFs>| {
-        let root_node = Arc::new(Inode {
-            ino: 0,
-            vfs: weak.clone(),
-            idata: Mutex::new(InodeData::default()),
-            ops: Box::new(dir),
-        });
-
-        ProcFs {
-            root_node,
-            next_ino: AtomicIno::new(1),
-        }
-    });
-
-    {
-        let mut indata = fs.root_node.idata.lock();
-        indata.mode = S_IFDIR | 0o755;
-        indata.nlink = 1;
-    };
-
-    unsafe {
-        GLOBAL_PROCFS = Some(fs);
-        ICACHE = Some(InodeCache::new(ProcFsMountCreator::get_weak()));
-    };
-
-    register_filesystem("procfs", Box::new(ProcFsMountCreator)).unwrap();
-
-    let root = root();
+    register_filesystem("procfs", Arc::new(ProcFsMountCreator)).unwrap();
 
     creat(
-        &root,
+        &root(),
         &Arc::from(b"mounts".as_slice()),
-        Box::new(DumpMountsFile {}),
+        Box::new(DumpMountsFile),
     )
     .unwrap();
 }

+ 188 - 228
src/fs/tmpfs.rs

@@ -1,373 +1,333 @@
+use alloc::sync::{Arc, Weak};
+use bindings::{EINVAL, EIO, EISDIR, S_IFBLK, S_IFCHR, S_IFDIR, S_IFLNK, S_IFREG};
 use core::sync::atomic::Ordering;
 
 use crate::{
     io::Buffer,
     kernel::vfs::{
-        dentry::Dentry,
-        inode::{AtomicIno, Ino, Inode, InodeCache, InodeOps, Mode},
+        dentry::{dcache, Dentry},
+        inode::{define_struct_inode, AtomicIno, Ino, Inode, Mode},
         mount::{register_filesystem, Mount, MountCreator, MS_RDONLY},
         s_isblk, s_ischr,
         vfs::Vfs,
         DevId, ReadDirCallback,
     },
     prelude::*,
+    sync::Locked,
 };
 
-use alloc::sync::Arc;
-
-use bindings::{
-    EINVAL, EIO, EISDIR, EROFS, S_IFBLK, S_IFCHR, S_IFDIR, S_IFLNK, S_IFREG,
-};
-
-struct FileOps {
-    data: Mutex<Vec<u8>>,
+fn acquire(vfs: &Weak<dyn Vfs>) -> KResult<Arc<dyn Vfs>> {
+    vfs.upgrade().ok_or(EIO)
 }
 
-struct NodeOps {
-    devid: DevId,
+fn astmp(vfs: &Arc<dyn Vfs>) -> &TmpFs {
+    vfs.as_any()
+        .downcast_ref::<TmpFs>()
+        .expect("corrupted tmpfs data structure")
 }
 
-impl NodeOps {
-    fn new(devid: DevId) -> Self {
-        Self { devid }
+define_struct_inode! {
+    struct NodeInode {
+        devid: DevId,
     }
 }
 
-impl InodeOps for NodeOps {
-    fn as_any(&self) -> &dyn Any {
-        self
+impl NodeInode {
+    fn new(ino: Ino, vfs: Weak<dyn Vfs>, mode: Mode, devid: DevId) -> Arc<Self> {
+        Self::new_locked(ino, vfs, |inode, _| unsafe {
+            addr_of_mut_field!(inode, devid).write(devid);
+
+            addr_of_mut_field!(inode, mode).write(mode.into());
+            addr_of_mut_field!(inode, nlink).write(1.into());
+        })
     }
+}
 
-    fn devid(&self, _: &Inode) -> KResult<DevId> {
+impl Inode for NodeInode {
+    fn devid(&self) -> KResult<DevId> {
         Ok(self.devid)
     }
 }
 
-struct DirectoryOps {
-    entries: Mutex<Vec<(Arc<[u8]>, Ino)>>,
+define_struct_inode! {
+    struct DirectoryInode {
+        entries: Locked<Vec<(Arc<[u8]>, Ino)>, ()>,
+    }
 }
 
-impl DirectoryOps {
-    fn new() -> Self {
-        Self {
-            entries: Mutex::new(vec![]),
-        }
+impl DirectoryInode {
+    fn new(ino: Ino, vfs: Weak<dyn Vfs>, mode: Mode) -> Arc<Self> {
+        Self::new_locked(ino, vfs, |inode, rwsem| unsafe {
+            addr_of_mut_field!(inode, entries)
+                .write(Locked::new(vec![(Arc::from(b".".as_slice()), ino)], rwsem));
+
+            addr_of_mut_field!(inode, size).write(1.into());
+            addr_of_mut_field!(inode, mode).write((S_IFDIR | (mode & 0o777)).into());
+            addr_of_mut_field!(inode, nlink).write(1.into()); // link from `.` to itself
+        })
     }
 
-    /// Locks the `inode.idata`
-    fn link(&self, dir: &Inode, file: &Inode, name: Arc<[u8]>) -> KResult<()> {
-        dir.idata.lock().size += 1;
-        self.entries.lock().push((name, file.ino));
+    fn link(&self, name: Arc<[u8]>, file: &dyn Inode, dlock: &mut ()) {
+        // Safety: Only `unlink` will do something based on `nlink` count
+        //         No need to synchronize here
+        file.nlink.fetch_add(1, Ordering::Relaxed);
 
-        file.idata.lock().nlink += 1;
+        // Safety: `rwsem` has done the synchronization
+        self.size.fetch_add(1, Ordering::Relaxed);
 
-        Ok(())
+        self.entries.access_mut(dlock).push((name, file.ino));
     }
 }
 
-impl InodeOps for DirectoryOps {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
+impl Inode for DirectoryInode {
     fn readdir<'cb, 'r: 'cb>(
-        &self,
-        _: &Inode,
+        &'r self,
         offset: usize,
         callback: &ReadDirCallback<'cb>,
     ) -> KResult<usize> {
+        let lock = self.rwsem.lock_shared();
         Ok(self
             .entries
-            .lock()
+            .access(lock.as_ref())
             .iter()
             .skip(offset)
             .take_while(|(name, ino)| callback(name, *ino).is_ok())
             .count())
     }
 
-    fn creat(&self, dir: &Inode, at: &Arc<Dentry>, mode: Mode) -> KResult<()> {
-        let vfs = dir.vfs.upgrade().ok_or(EIO)?;
-        let vfs = vfs.as_any().downcast_ref::<TmpFs>().unwrap();
+    fn creat(&self, at: &Arc<Dentry>, mode: Mode) -> KResult<()> {
+        let vfs = acquire(&self.vfs)?;
+        let vfs = astmp(&vfs);
 
-        if vfs.readonly {
-            return Err(EROFS);
-        }
+        let mut rwsem = self.rwsem.lock();
 
         let ino = vfs.assign_ino();
-        let file = vfs.icache.lock().alloc_file(ino, mode)?;
+        let file = FileInode::new(ino, self.vfs.clone(), mode);
 
-        self.link(dir, file.as_ref(), at.name().clone())?;
+        self.link(at.name().clone(), file.as_ref(), rwsem.as_mut());
         at.save_reg(file)
     }
 
-    fn mknod(
-        &self,
-        dir: &Inode,
-        at: &Arc<Dentry>,
-        mode: Mode,
-        dev: DevId,
-    ) -> KResult<()> {
-        let vfs = dir.vfs.upgrade().ok_or(EIO)?;
-        let vfs = vfs.as_any().downcast_ref::<TmpFs>().unwrap();
-
-        if vfs.readonly {
-            return Err(EROFS);
-        }
-
+    fn mknod(&self, at: &Arc<Dentry>, mode: Mode, dev: DevId) -> KResult<()> {
         if !s_ischr(mode) && !s_isblk(mode) {
             return Err(EINVAL);
         }
 
-        let ino = vfs.assign_ino();
-        let mut icache = vfs.icache.lock();
-        let file = icache.alloc(ino, Box::new(NodeOps::new(dev)));
-        file.idata.lock().mode = mode & (0o777 | S_IFBLK | S_IFCHR);
-        icache.submit(&file)?;
+        let vfs = acquire(&self.vfs)?;
+        let vfs = astmp(&vfs);
+
+        let mut rwsem = self.rwsem.lock();
 
-        self.link(dir, file.as_ref(), at.name().clone())?;
+        let ino = vfs.assign_ino();
+        let file = NodeInode::new(
+            ino,
+            self.vfs.clone(),
+            mode & (0o777 | S_IFBLK | S_IFCHR),
+            dev,
+        );
+
+        self.link(at.name().clone(), file.as_ref(), rwsem.as_mut());
         at.save_reg(file)
     }
 
-    fn symlink(
-        &self,
-        dir: &Inode,
-        at: &Arc<Dentry>,
-        target: &[u8],
-    ) -> KResult<()> {
-        let vfs = dir.vfs.upgrade().ok_or(EIO)?;
-        let vfs = vfs.as_any().downcast_ref::<TmpFs>().unwrap();
-
-        if vfs.readonly {
-            return Err(EROFS);
-        }
-
-        let ino = vfs.assign_ino();
-        let mut icache = vfs.icache.lock();
+    fn symlink(&self, at: &Arc<Dentry>, target: &[u8]) -> KResult<()> {
+        let vfs = acquire(&self.vfs)?;
+        let vfs = astmp(&vfs);
 
-        let target_len = target.len() as u64;
+        let mut rwsem = self.rwsem.lock();
 
-        let file =
-            icache.alloc(ino, Box::new(SymlinkOps::new(Arc::from(target))));
-        {
-            let mut idata = file.idata.lock();
-            idata.mode = S_IFLNK | 0o777;
-            idata.size = target_len;
-        }
-        icache.submit(&file)?;
+        let ino = vfs.assign_ino();
+        let file = SymlinkInode::new(ino, self.vfs.clone(), target.into());
 
-        self.link(dir, file.as_ref(), at.name().clone())?;
+        self.link(at.name().clone(), file.as_ref(), rwsem.as_mut());
         at.save_symlink(file)
     }
 
-    fn mkdir(&self, dir: &Inode, at: &Arc<Dentry>, mode: Mode) -> KResult<()> {
-        let vfs = dir.vfs.upgrade().ok_or(EIO)?;
-        let vfs = vfs.as_any().downcast_ref::<TmpFs>().unwrap();
+    fn mkdir(&self, at: &Arc<Dentry>, mode: Mode) -> KResult<()> {
+        let vfs = acquire(&self.vfs)?;
+        let vfs = astmp(&vfs);
 
-        if vfs.readonly {
-            return Err(EROFS);
-        }
+        let mut rwsem = self.rwsem.lock();
 
         let ino = vfs.assign_ino();
-        let mut icache = vfs.icache.lock();
-
-        let mut newdir_ops = DirectoryOps::new();
-        let entries = newdir_ops.entries.get_mut();
-        entries.push((Arc::from(b".".as_slice()), ino));
-        entries.push((Arc::from(b"..".as_slice()), dir.ino));
-
-        let newdir = icache.alloc(ino, Box::new(newdir_ops));
-        {
-            let mut newdir_idata = newdir.idata.lock();
-            newdir_idata.mode = S_IFDIR | (mode & 0o777);
-            newdir_idata.nlink = 1;
-            newdir_idata.size = 2;
-        }
+        let newdir = DirectoryInode::new(ino, self.vfs.clone(), mode);
 
-        icache.submit(&newdir)?;
-        dir.idata.lock().nlink += 1; // link from `newdir` to `dir`, (or parent)
-
-        self.link(dir, newdir.as_ref(), at.name().clone())?;
+        self.link(at.name().clone(), newdir.as_ref(), rwsem.as_mut());
         at.save_dir(newdir)
     }
 
-    fn unlink(&self, dir: &Inode, at: &Arc<Dentry>) -> KResult<()> {
-        let vfs = dir.vfs.upgrade().ok_or(EIO)?;
-        let vfs = vfs.as_any().downcast_ref::<TmpFs>().unwrap();
+    fn unlink(&self, at: &Arc<Dentry>) -> KResult<()> {
+        let vfs = acquire(&self.vfs)?;
+        let vfs = astmp(&vfs);
 
-        if vfs.readonly {
-            return Err(EROFS);
-        }
+        let mut dlock = self.rwsem.lock();
 
         let file = at.get_inode()?;
+        let _flock = file.rwsem.lock();
 
-        let mut file_idata = file.idata.lock();
-
-        if file_idata.mode & S_IFDIR != 0 {
+        // Safety: `flock` has done the synchronization
+        if file.mode.load(Ordering::Relaxed) & S_IFDIR != 0 {
             return Err(EISDIR);
         }
 
-        let mut self_idata = dir.idata.lock();
-        let mut entries = self.entries.lock();
-
-        let idx = entries
-            .iter()
-            .position(|(_, ino)| *ino == file.ino)
-            .expect("file not found in directory");
+        let entries = self.entries.access_mut(dlock.as_mut());
+        entries.retain(|(_, ino)| *ino != file.ino);
+
+        assert_eq!(
+            entries.len() as u64,
+            // Safety: `dlock` has done the synchronization
+            self.size.fetch_sub(1, Ordering::Relaxed) - 1
+        );
+
+        // Safety: `flock` has done the synchronization
+        let file_nlink = file.nlink.fetch_sub(1, Ordering::Relaxed) - 1;
+
+        if file_nlink == 0 {
+            // Remove the file inode from the inode cache
+            // The last reference to the inode is held by some dentry
+            // and will be released when the dentry is released
+            //
+            // TODO: Should we use some inode cache in tmpfs?
+            //
+            // vfs.icache.lock().retain(|ino, _| *ino != file.ino);
+        }
 
-        self_idata.size -= 1;
-        file_idata.nlink -= 1;
-        entries.remove(idx);
+        // Postpone the invalidation of the dentry and inode until the
+        // last reference to the dentry is released
+        //
+        // But we can remove it from the dentry cache immediately
+        // so later lookup will fail with ENOENT
+        dcache::d_remove(at);
 
-        at.invalidate()
+        Ok(())
     }
 }
 
-struct SymlinkOps {
-    target: Arc<[u8]>,
-}
-
-impl SymlinkOps {
-    fn new(target: Arc<[u8]>) -> Self {
-        Self { target }
+define_struct_inode! {
+    struct SymlinkInode {
+        target: Arc<[u8]>,
     }
 }
 
-impl InodeOps for SymlinkOps {
-    fn as_any(&self) -> &dyn Any {
-        self
+impl SymlinkInode {
+    fn new(ino: Ino, vfs: Weak<dyn Vfs>, target: Arc<[u8]>) -> Arc<Self> {
+        Self::new_locked(ino, vfs, |inode, _| unsafe {
+            let len = target.len();
+            addr_of_mut_field!(inode, target).write(target);
+
+            addr_of_mut_field!(inode, mode).write((S_IFLNK | 0o777).into());
+            addr_of_mut_field!(inode, size).write((len as u64).into());
+        })
     }
+}
 
-    fn readlink(&self, _: &Inode, buffer: &mut dyn Buffer) -> KResult<usize> {
+impl Inode for SymlinkInode {
+    fn readlink(&self, buffer: &mut dyn Buffer) -> KResult<usize> {
         buffer
             .fill(self.target.as_ref())
             .map(|result| result.allow_partial())
     }
 }
 
-impl FileOps {
-    fn new() -> Self {
-        Self {
-            data: Mutex::new(vec![]),
-        }
+define_struct_inode! {
+    struct FileInode {
+        filedata: Locked<Vec<u8>, ()>,
     }
 }
 
-impl InodeOps for FileOps {
-    fn as_any(&self) -> &dyn Any {
-        self
+impl FileInode {
+    fn new(ino: Ino, vfs: Weak<dyn Vfs>, mode: Mode) -> Arc<Self> {
+        Self::new_locked(ino, vfs, |inode, rwsem| unsafe {
+            addr_of_mut_field!(inode, filedata).write(Locked::new(vec![], rwsem));
+
+            addr_of_mut_field!(inode, mode).write((S_IFREG | (mode & 0o777)).into());
+            addr_of_mut_field!(inode, nlink).write(1.into());
+        })
     }
+}
 
-    fn read(
-        &self,
-        _: &Inode,
-        buffer: &mut dyn Buffer,
-        offset: usize,
-    ) -> KResult<usize> {
-        let data = self.data.lock();
-        let data = data.split_at_checked(offset).ok_or(EINVAL)?.1;
+impl Inode for FileInode {
+    fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize> {
+        // TODO: We don't need that strong guarantee, find some way to avoid locks
+        let lock = self.rwsem.lock_shared();
+
+        let (_, data) = self
+            .filedata
+            .access(lock.as_ref())
+            .split_at_checked(offset)
+            .ok_or(EINVAL)?;
 
         buffer.fill(data).map(|result| result.allow_partial())
     }
 
-    fn write(
-        &self,
-        inode: &Inode,
-        buffer: &[u8],
-        offset: usize,
-    ) -> KResult<usize> {
-        let mut idata = inode.idata.lock();
-        let mut data = self.data.lock();
+    fn write(&self, buffer: &[u8], offset: usize) -> KResult<usize> {
+        // TODO: We don't need that strong guarantee, find some way to avoid locks
+        let mut lock = self.rwsem.lock();
+        let filedata = self.filedata.access_mut(lock.as_mut());
 
-        if data.len() < offset + buffer.len() {
-            data.resize(offset + buffer.len(), 0);
+        if filedata.len() < offset + buffer.len() {
+            filedata.resize(offset + buffer.len(), 0);
         }
 
-        data[offset..offset + buffer.len()].copy_from_slice(&buffer);
-        idata.size = data.len() as u64;
+        filedata[offset..offset + buffer.len()].copy_from_slice(&buffer);
+
+        // Safety: `lock` has done the synchronization
+        self.size.store(filedata.len() as u64, Ordering::Relaxed);
 
         Ok(buffer.len())
     }
 
-    fn truncate(&self, inode: &Inode, length: usize) -> KResult<()> {
-        let mut idata = inode.idata.lock();
+    fn truncate(&self, length: usize) -> KResult<()> {
+        // TODO: We don't need that strong guarantee, find some way to avoid locks
+        let mut lock = self.rwsem.lock();
+        let filedata = self.filedata.access_mut(lock.as_mut());
 
-        idata.size = length as u64;
-        self.data.lock().resize(length, 0);
+        // Safety: `lock` has done the synchronization
+        self.size.store(length as u64, Ordering::Relaxed);
+        filedata.resize(length, 0);
 
         Ok(())
     }
 }
 
-/// # Lock order
-/// `vfs` -> `icache` -> `idata` -> `*ops`.`*data`
+impl_any!(TmpFs);
 struct TmpFs {
-    icache: Mutex<InodeCache<TmpFs>>,
     next_ino: AtomicIno,
     readonly: bool,
 }
 
-impl InodeCache<TmpFs> {
-    fn alloc_file(&mut self, ino: Ino, mode: Mode) -> KResult<Arc<Inode>> {
-        let file = self.alloc(ino, Box::new(FileOps::new()));
-        file.idata.lock().mode = S_IFREG | (mode & 0o777);
+impl Vfs for TmpFs {
+    fn io_blksize(&self) -> usize {
+        4096
+    }
 
-        self.submit(&file)?;
+    fn fs_devid(&self) -> DevId {
+        2
+    }
 
-        Ok(file)
+    fn is_read_only(&self) -> bool {
+        self.readonly
     }
 }
 
 impl TmpFs {
     fn assign_ino(&self) -> Ino {
-        self.next_ino.fetch_add(1, Ordering::SeqCst)
+        self.next_ino.fetch_add(1, Ordering::AcqRel)
     }
 
-    pub fn create(readonly: bool) -> KResult<(Arc<TmpFs>, Arc<Inode>)> {
-        let tmpfs = Arc::new_cyclic(|weak| Self {
-            icache: Mutex::new(InodeCache::new(weak.clone())),
+    pub fn create(readonly: bool) -> KResult<(Arc<dyn Vfs>, Arc<dyn Inode>)> {
+        let tmpfs = Arc::new(Self {
             next_ino: AtomicIno::new(1),
             readonly,
         });
 
-        let mut dir = DirectoryOps::new();
-        let entries = dir.entries.get_mut();
-        entries.push((Arc::from(b".".as_slice()), 0));
-        entries.push((Arc::from(b"..".as_slice()), 0));
-
-        let root_dir = {
-            let mut icache = tmpfs.icache.lock();
-            let root_dir = icache.alloc(0, Box::new(dir));
-            {
-                let mut idata = root_dir.idata.lock();
-
-                idata.mode = S_IFDIR | 0o755;
-                idata.nlink = 2;
-                idata.size = 2;
-            }
-
-            icache.submit(&root_dir)?;
-
-            root_dir
-        };
+        let weak = Arc::downgrade(&tmpfs);
+        let root_dir = DirectoryInode::new(0, weak, 0o755);
 
         Ok((tmpfs, root_dir))
     }
 }
 
-impl Vfs for TmpFs {
-    fn io_blksize(&self) -> usize {
-        4096
-    }
-
-    fn fs_devid(&self) -> DevId {
-        2
-    }
-
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-}
-
 struct TmpFsMountCreator;
 
 impl MountCreator for TmpFsMountCreator {
@@ -385,5 +345,5 @@ impl MountCreator for TmpFsMountCreator {
 }
 
 pub fn init() {
-    register_filesystem("tmpfs", Box::new(TmpFsMountCreator)).unwrap();
+    register_filesystem("tmpfs", Arc::new(TmpFsMountCreator)).unwrap();
 }

+ 5 - 3
src/kernel/async/lock.cc

@@ -52,24 +52,26 @@ static inline void _restore_interrupt_state(lock_context_t context) {
 // TODO: mark as _per_cpu
 static inline preempt_count_t& _preempt_count() {
     static preempt_count_t _preempt_count;
-    assert(!(_preempt_count & 0x80000000));
+    assert(_preempt_count >= 0);
     return _preempt_count;
 }
 
 void preempt_disable() {
     ++_preempt_count();
+    asm volatile("" : : : "memory");
 }
 
 void preempt_enable() {
+    asm volatile("" : : : "memory");
     --_preempt_count();
 }
 
 extern "C" void r_preempt_disable() {
-    ++_preempt_count();
+    preempt_disable();
 }
 
 extern "C" void r_preempt_enable() {
-    --_preempt_count();
+    preempt_enable();
 }
 
 preempt_count_t preempt_count() {

+ 2 - 1
src/kernel/async/waitlist.cc

@@ -8,13 +8,14 @@
 using namespace kernel::async;
 
 bool wait_list::wait(mutex& lock) {
+    preempt_disable();
     this->subscribe();
 
     auto* curthd = current_thread;
     curthd->set_attr(kernel::task::thread::ISLEEP);
 
     lock.unlock();
-    bool has_signals = schedule();
+    bool has_signals = schedule_now_preempt_disabled();
     lock.lock();
 
     m_subscribers.erase(curthd);

+ 12 - 10
src/kernel/block.rs

@@ -11,7 +11,7 @@ use alloc::{
 };
 use bindings::{EEXIST, EINVAL, EIO, ENOENT};
 
-use crate::KResult;
+use lazy_static::lazy_static;
 
 use super::{
     mem::{paging::Page, phys::PhysPtr},
@@ -27,18 +27,18 @@ pub trait BlockRequestQueue: Send + Sync {
     ///
     fn max_request_pages(&self) -> u64;
 
-    fn submit(&mut self, req: BlockDeviceRequest) -> KResult<()>;
+    fn submit(&self, req: BlockDeviceRequest) -> KResult<()>;
 }
 
 struct BlockDeviceDisk {
-    queue: Arc<Mutex<dyn BlockRequestQueue>>,
+    queue: Arc<dyn BlockRequestQueue>,
 }
 
 struct BlockDevicePartition {
     disk_dev: DevId,
     offset: u64,
 
-    queue: Arc<Mutex<dyn BlockRequestQueue>>,
+    queue: Arc<dyn BlockRequestQueue>,
 }
 
 enum BlockDeviceType {
@@ -74,8 +74,10 @@ impl Ord for BlockDevice {
     }
 }
 
-static BLOCK_DEVICE_LIST: Mutex<BTreeMap<DevId, Arc<BlockDevice>>> =
-    Mutex::new(BTreeMap::new());
+lazy_static! {
+    static ref BLOCK_DEVICE_LIST: Spin<BTreeMap<DevId, Arc<BlockDevice>>> =
+        Spin::new(BTreeMap::new());
+}
 
 #[derive(Debug, Clone, Copy)]
 #[repr(C)]
@@ -100,9 +102,9 @@ impl BlockDevice {
     pub fn register_disk(
         devid: DevId,
         size: u64,
-        queue: Arc<Mutex<dyn BlockRequestQueue>>,
+        queue: Arc<dyn BlockRequestQueue>,
     ) -> KResult<Arc<Self>> {
-        let max_pages = queue.lock().max_request_pages();
+        let max_pages = queue.max_request_pages();
         let device = Arc::new(Self {
             devid,
             size,
@@ -199,10 +201,10 @@ impl BlockDevice {
         }
 
         match self.dev_type {
-            BlockDeviceType::Disk(ref disk) => disk.queue.lock().submit(req),
+            BlockDeviceType::Disk(ref disk) => disk.queue.submit(req),
             BlockDeviceType::Partition(ref part) => {
                 req.sector += part.offset;
-                part.queue.lock().submit(req)
+                part.queue.submit(req)
             }
         }
     }

+ 35 - 5
src/kernel/console.rs

@@ -1,6 +1,8 @@
 use crate::prelude::*;
 
-pub struct Console {}
+use lazy_static::lazy_static;
+
+pub struct Console;
 
 impl Write for Console {
     fn write_str(&mut self, s: &str) -> core::fmt::Result {
@@ -19,11 +21,13 @@ impl Write for Console {
 }
 
 #[doc(hidden)]
-pub fn _print(args: core::fmt::Arguments) -> core::fmt::Result {
-    CONSOLE.lock().write_fmt(args)
+pub fn _print(args: core::fmt::Arguments) {
+    dont_check!(CONSOLE.lock_irq().write_fmt(args))
 }
 
-pub static CONSOLE: spin::Mutex<Console> = spin::Mutex::new(Console {});
+lazy_static! {
+    pub static ref CONSOLE: Spin<Console> = Spin::new(Console {});
+}
 
 macro_rules! print {
     ($($arg:tt)*) => {
@@ -40,4 +44,30 @@ macro_rules! println {
     };
 }
 
-pub(crate) use {print, println};
+macro_rules! println_warn {
+    ($($arg:tt)*) => {
+        $crate::println!("[kernel: warn] {}", format_args!($($arg)*))
+    };
+}
+
+macro_rules! println_debug {
+    ($($arg:tt)*) => {
+        $crate::println!("[kernel:debug] {}", format_args!($($arg)*))
+    };
+}
+
+macro_rules! println_info {
+    ($($arg:tt)*) => {
+        $crate::println!("[kernel: info] {}", format_args!($($arg)*))
+    };
+}
+
+macro_rules! println_fatal {
+    ($($arg:tt)*) => {
+        $crate::println!("[kernel:fatal] {}", format_args!($($arg)*))
+    };
+}
+
+pub(crate) use {
+    print, println, println_debug, println_fatal, println_info, println_warn,
+};

+ 3 - 1
src/kernel/interrupt.cpp

@@ -1,3 +1,4 @@
+#include "kernel/async/lock.hpp"
 #include <list>
 #include <vector>
 
@@ -75,7 +76,8 @@ void kernel::kinit::init_interrupt() {
     // TODO: move this to timer driver
     kernel::irq::register_handler(0, []() {
         kernel::hw::timer::inc_tick();
-        schedule();
+        if (async::preempt_count() == 0)
+            schedule_now();
     });
 
     port_pic1_command = 0x11; // edge trigger mode

+ 12 - 20
src/kernel/interrupt.rs

@@ -2,43 +2,35 @@ use alloc::boxed::Box;
 use alloc::vec;
 use alloc::vec::Vec;
 
+use lazy_static::lazy_static;
+
 use crate::bindings::root::EINVAL;
+use crate::Spin;
 
-static mut IRQ_HANDLERS: spin::Mutex<[Option<Vec<Box<dyn Fn()>>>; 16]> =
-    spin::Mutex::new([const { None }; 16]);
+lazy_static! {
+    static ref IRQ_HANDLERS: Spin<[Vec<Box<dyn Fn() + Send>>; 16]> =
+        Spin::new(core::array::from_fn(|_| vec![]));
+}
 
 #[no_mangle]
 pub extern "C" fn irq_handler_rust(irqno: core::ffi::c_int) {
     assert!(irqno >= 0 && irqno < 16);
 
-    let handlers = unsafe { IRQ_HANDLERS.lock() };
+    let handlers = IRQ_HANDLERS.lock();
 
-    match handlers[irqno as usize] {
-        Some(ref handlers) => {
-            for handler in handlers {
-                handler();
-            }
-        }
-        None => {}
+    for handler in handlers[irqno as usize].iter() {
+        handler();
     }
 }
 
 pub fn register_irq_handler<F>(irqno: i32, handler: F) -> Result<(), u32>
 where
-    F: Fn() + 'static,
+    F: Fn() + Send + 'static,
 {
     if irqno < 0 || irqno >= 16 {
         return Err(EINVAL);
     }
 
-    let mut handlers = unsafe { IRQ_HANDLERS.lock() };
-
-    match handlers[irqno as usize] {
-        Some(ref mut handlers) => handlers.push(Box::new(handler)),
-        None => {
-            handlers[irqno as usize].replace(vec![Box::new(handler)]);
-        }
-    }
-
+    IRQ_HANDLERS.lock_irq()[irqno as usize].push(Box::new(handler));
     Ok(())
 }

+ 10 - 14
src/kernel/mem/mm_list.cc

@@ -16,8 +16,7 @@ static inline void __invalidate_all_tlb() {
         : "rax", "memory");
 }
 
-static inline void __dealloc_page_table_all(paging::pfn_t pt, int depth,
-                                            int from, int to) {
+static inline void __dealloc_page_table_all(paging::pfn_t pt, int depth, int from, int to) {
     using namespace paging;
 
     if (depth > 1) {
@@ -138,8 +137,7 @@ int mm_list::register_brk(uintptr_t addr) {
         return -ENOMEM;
 
     bool inserted;
-    std::tie(m_brk, inserted) =
-        m_areas.emplace(addr, MM_ANONYMOUS | MM_WRITE | MM_BREAK);
+    std::tie(m_brk, inserted) = m_areas.emplace(addr, MM_ANONYMOUS | MM_WRITE | MM_BREAK);
 
     assert(inserted);
     return 0;
@@ -186,8 +184,8 @@ mm_list::iterator mm_list::split(iterator area, uintptr_t addr) {
     auto new_end = area->end;
     area->end = addr;
 
-    auto [iter, inserted] = m_areas.emplace(addr, area->flags, new_end,
-                                            area->mapped_file, new_file_offset);
+    auto [iter, inserted] =
+        m_areas.emplace(addr, area->flags, new_end, d_get(area->mapped_file), new_file_offset);
 
     assert(inserted);
     return iter;
@@ -217,8 +215,7 @@ int mm_list::unmap(iterator area, bool should_invalidate_tlb) {
     return 0;
 }
 
-int mm_list::unmap(uintptr_t start, std::size_t length,
-                   bool should_invalidate_tlb) {
+int mm_list::unmap(uintptr_t start, std::size_t length, bool should_invalidate_tlb) {
     // standard says that addr and len MUST be
     // page-aligned or the call is invalid
     if (start & 0xfff)
@@ -279,7 +276,7 @@ int mm_list::unmap(uintptr_t start, std::size_t length,
 int mm_list::mmap(const map_args& args) {
     auto& vaddr = args.vaddr;
     auto& length = args.length;
-    auto& finode = args.file_inode;
+    auto& file = args.file;
     auto& foff = args.file_offset;
     auto& flags = args.flags;
 
@@ -298,10 +295,10 @@ int mm_list::mmap(const map_args& args) {
         attributes |= PA_NXE;
 
     if (flags & MM_MAPPED) {
-        assert(finode);
+        assert(file);
 
-        auto [area, inserted] = m_areas.emplace(
-            vaddr, flags & ~MM_INTERNAL_MASK, vaddr + length, finode, foff);
+        auto [area, inserted] =
+            m_areas.emplace(vaddr, flags & ~MM_INTERNAL_MASK, vaddr + length, d_get(file), foff);
         assert(inserted);
 
         attributes |= PA_MMAPPED_PAGE;
@@ -310,8 +307,7 @@ int mm_list::mmap(const map_args& args) {
     } else if (flags & MM_ANONYMOUS) {
         // private mapping of zero-filled pages
         // TODO: shared mapping
-        auto [area, inserted] =
-            m_areas.emplace(vaddr, (flags & ~MM_INTERNAL_MASK), vaddr + length);
+        auto [area, inserted] = m_areas.emplace(vaddr, (flags & ~MM_INTERNAL_MASK), vaddr + length);
         assert(inserted);
 
         attributes |= PA_ANONYMOUS_PAGE;

+ 8 - 4
src/kernel/mem/paging.cc

@@ -96,8 +96,7 @@ static inline page* _create_zone(pfn_t pfn, unsigned order) {
 }
 
 // call with zone_lock held
-static inline void _split_zone(page* zone, unsigned order,
-                               unsigned target_order) {
+static inline void _split_zone(page* zone, unsigned order, unsigned target_order) {
     while (order > target_order) {
         pfn_t pfn = page_to_pfn(zone);
         _create_zone(buddy(pfn, order - 1), order - 1);
@@ -358,8 +357,13 @@ void kernel::mem::paging::handle_page_fault(unsigned long err) {
         size_t offset = (vaddr & ~0xfff) - mm_area->start;
         char* data = physaddr<char>{pfn};
 
-        int n = fs_read(mm_area->mapped_file, data, 4096,
-                        mm_area->file_offset + offset, 4096);
+        int n = fs::fs_read(mm_area->mapped_file.get(), data, 4096, mm_area->file_offset + offset,
+                            4096);
+
+        if (n < 0) {
+            kill_current(SIGBUS);
+            return;
+        }
 
         // TODO: send SIGBUS if offset is greater than real size
         if (n != 4096)

+ 20 - 10
src/kernel/mem/slab.cc

@@ -4,6 +4,7 @@
 
 #include <types/list.hpp>
 
+#include <kernel/async/lock.hpp>
 #include <kernel/mem/paging.hpp>
 #include <kernel/mem/slab.hpp>
 
@@ -12,6 +13,8 @@ using namespace types::list;
 
 constexpr std::size_t SLAB_PAGE_SIZE = 0x1000; // 4K
 
+kernel::async::mutex slab_lock;
+
 std::ptrdiff_t _slab_data_start_offset(std::size_t size) {
     return (sizeof(slab_head) + size - 1) & ~(size - 1);
 }
@@ -67,6 +70,8 @@ void _slab_add_page(slab_cache* cache) {
 }
 
 void* kernel::mem::slab_alloc(slab_cache* cache) {
+    async::lock_guard_irq lock(slab_lock);
+
     slab_head* slab = cache->slabs_partial;
     if (!slab) {                 // no partial slabs, try to get an empty slab
         if (!cache->slabs_empty) // no empty slabs, create a new one
@@ -88,24 +93,29 @@ void* kernel::mem::slab_alloc(slab_cache* cache) {
 }
 
 void kernel::mem::slab_free(void* ptr) {
+    async::lock_guard_irq lock(slab_lock);
+
     slab_head* slab = (slab_head*)((uintptr_t)ptr & ~(SLAB_PAGE_SIZE - 1));
 
     *(void**)ptr = slab->free;
     slab->free = ptr;
     slab->free_count++;
 
-    if (slab->free_count == _slab_max_count(slab->obj_size)) {
-        auto* cache = slab->cache;
-        slab_head** head = nullptr;
+    auto max_count = _slab_max_count(slab->obj_size);
 
-        if (cache->slabs_full == slab) {
-            head = &cache->slabs_full;
-        } else {
-            head = &cache->slabs_partial;
-        }
+    if (max_count == 1) {
+        list_remove(&slab->cache->slabs_full, slab);
+        list_insert(&slab->cache->slabs_empty, slab);
+    }
+
+    if (slab->free_count == 1) {
+        list_remove(&slab->cache->slabs_full, slab);
+        list_insert(&slab->cache->slabs_partial, slab);
+    }
 
-        list_remove(head, slab);
-        list_insert(&cache->slabs_empty, slab);
+    if (slab->free_count == max_count) {
+        list_remove(&slab->cache->slabs_partial, slab);
+        list_insert(&slab->cache->slabs_empty, slab);
     }
 }
 

+ 32 - 6
src/kernel/process.cpp

@@ -327,9 +327,14 @@ extern "C" void asm_ctx_switch(uintptr_t* curr_sp, uintptr_t* next_sp);
 extern "C" void after_ctx_switch() {
     current_thread->kstack.load_interrupt_stack();
     current_thread->load_thread_area32();
+
+    kernel::async::preempt_enable();
 }
 
-bool _schedule() {
+// call this with preempt_count == 1
+// after this function returns, preempt_count will be 0
+static bool do_schedule() {
+    asm volatile("" : : : "memory");
     auto* next_thd = kernel::task::dispatcher::next();
 
     if (current_thread != next_thd) {
@@ -342,21 +347,41 @@ bool _schedule() {
         auto* curr_thd = current_thread;
         current_thread = next_thd;
 
+        // this implies preempt_enable()
         asm_ctx_switch(&curr_thd->kstack.sp, &next_thd->kstack.sp);
+    } else {
+        kernel::async::preempt_enable();
     }
 
     return current_thread->signals.pending_signal() == 0;
 }
 
-bool schedule() {
-    if (kernel::async::preempt_count() != 0)
-        return true;
+static inline void check_preempt_count(kernel::async::preempt_count_t n) {
+    if (kernel::async::preempt_count() != n) [[unlikely]] {
+        kmsgf(
+            "[kernel:fatal] trying to call schedule_now() with preempt count "
+            "%d, expected %d",
+            kernel::async::preempt_count(), n);
+        assert(kernel::async::preempt_count() == n);
+    }
+}
+
+bool schedule_now() {
+    check_preempt_count(0);
+    kernel::async::preempt_disable();
+    bool result = do_schedule();
+    return result;
+}
 
-    return _schedule();
+// call this with preempt_count == 1
+bool schedule_now_preempt_disabled() {
+    check_preempt_count(1);
+    return do_schedule();
 }
 
 void NORETURN schedule_noreturn(void) {
-    _schedule();
+    schedule_now();
+    kmsgf("[kernel:fatal] an schedule_noreturn() DOES return");
     freeze();
 }
 
@@ -365,6 +390,7 @@ void NORETURN freeze(void) {
         asm volatile("cli\n\thlt");
 }
 
+// TODO!!!: make sure we call this after having done all clean up works
 void NORETURN kill_current(int signo) {
     procs->kill(current_process->pid, (signo + 128) << 8 | (signo & 0xff));
     schedule_noreturn();

+ 1 - 1
src/kernel/signal.cpp

@@ -42,7 +42,7 @@ static void stop_process(int signal) {
     parent.waitlist.notify_all();
 
     while (true) {
-        if (schedule())
+        if (schedule_now())
             break;
     }
 

+ 1 - 4
src/kernel/syscall.cpp

@@ -202,9 +202,6 @@ static uint32_t _syscall32_fork(interrupt_stack* data, mmx_registers* mmxregs) {
     assert(inserted);
     auto* newthd = &*iter_newthd;
 
-    kernel::async::preempt_disable();
-    kernel::task::dispatcher::enqueue(newthd);
-
     auto newthd_prev_sp = newthd->kstack.sp;
     assert(!(newthd_prev_sp & 0xf));
 
@@ -230,7 +227,7 @@ static uint32_t _syscall32_fork(interrupt_stack* data, mmx_registers* mmxregs) {
     newthd->kstack.pushq(0);              // 0 for alignment
     newthd->kstack.pushq(newthd_prev_sp); // previous sp
 
-    kernel::async::preempt_enable();
+    kernel::task::dispatcher::enqueue(newthd);
     return newproc.pid;
 }
 

+ 13 - 19
src/kernel/syscall/fileops.cc

@@ -76,12 +76,10 @@ int kernel::syscall::do_open(const char __user* path, int flags, mode_t mode) {
     mode &= ~current_process->umask;
 
     // TODO: use copy_from_user
-    return current_process->files.open(current_process->cwd, path, flags,
-                                       mode);
+    return current_process->files.open(current_process->cwd, path, flags, mode);
 }
 
-int kernel::syscall::do_symlink(const char __user* target,
-                                const char __user* linkpath) {
+int kernel::syscall::do_symlink(const char __user* target, const char __user* linkpath) {
     // TODO: use copy_from_user
     auto [dent, status] = current_open(linkpath, false);
     if (!dent)
@@ -94,8 +92,7 @@ int kernel::syscall::do_symlink(const char __user* target,
     return fs::fs_symlink(dent.get(), target);
 }
 
-int kernel::syscall::do_readlink(const char __user* pathname, char __user* buf,
-                                 size_t buf_size) {
+int kernel::syscall::do_readlink(const char __user* pathname, char __user* buf, size_t buf_size) {
     // TODO: use copy_from_user
     auto [dent, status] = current_open(pathname, false);
 
@@ -106,7 +103,7 @@ int kernel::syscall::do_readlink(const char __user* pathname, char __user* buf,
         return -EINVAL;
 
     // TODO: use copy_to_user
-    return fs_readlink(fs::r_dentry_get_inode(dent.get()), buf, buf_size);
+    return fs::fs_readlink(dent.get(), buf, buf_size);
 }
 
 int kernel::syscall::do_ioctl(int fd, unsigned long request, uintptr_t arg3) {
@@ -176,8 +173,7 @@ int kernel::syscall::do_ioctl(int fd, unsigned long request, uintptr_t arg3) {
             break;
         }
         default:
-            kmsgf("[error] the ioctl() function %x is not implemented",
-                  request);
+            kmsgf("[error] the ioctl() function %x is not implemented", request);
             return -EINVAL;
     }
 
@@ -261,8 +257,8 @@ off_t kernel::syscall::do_lseek(int fd, off_t offset, int whence) {
     return file->seek(offset, whence);
 }
 
-uintptr_t kernel::syscall::do_mmap_pgoff(uintptr_t addr, size_t len, int prot,
-                                         int flags, int fd, off_t pgoffset) {
+uintptr_t kernel::syscall::do_mmap_pgoff(uintptr_t addr, size_t len, int prot, int flags, int fd,
+                                         off_t pgoffset) {
     if (addr & 0xfff)
         return -EINVAL;
     if (len == 0)
@@ -328,8 +324,7 @@ int kernel::syscall::do_munmap(uintptr_t addr, size_t len) {
     return current_process->mms.unmap(addr, len, true);
 }
 
-ssize_t kernel::syscall::do_sendfile(int out_fd, int in_fd,
-                                     off_t __user* offset, size_t count) {
+ssize_t kernel::syscall::do_sendfile(int out_fd, int in_fd, off_t __user* offset, size_t count) {
     auto* out_file = current_process->files[out_fd];
     auto* in_file = current_process->files[in_fd];
 
@@ -368,8 +363,8 @@ ssize_t kernel::syscall::do_sendfile(int out_fd, int in_fd,
     return totn;
 }
 
-int kernel::syscall::do_statx(int dirfd, const char __user* path, int flags,
-                              unsigned int mask, statx __user* statxbuf) {
+int kernel::syscall::do_statx(int dirfd, const char __user* path, int flags, unsigned int mask,
+                              statx __user* statxbuf) {
     // AT_STATX_SYNC_AS_STAT is the default value
     if ((flags & AT_STATX_SYNC_TYPE) != AT_STATX_SYNC_AS_STAT) {
         NOT_IMPLEMENTED;
@@ -386,7 +381,7 @@ int kernel::syscall::do_statx(int dirfd, const char __user* path, int flags,
         return status;
 
     // TODO: copy to user
-    return fs_statx(fs::r_dentry_get_inode(dent.get()), statxbuf, mask);
+    return fs::fs_statx(dent.get(), statxbuf, mask);
 }
 
 int kernel::syscall::do_fcntl(int fd, int cmd, unsigned long arg) {
@@ -427,7 +422,7 @@ int kernel::syscall::do_truncate(const char __user* pathname, long length) {
     if (!dent || status)
         return status;
 
-    return fs_truncate(fs::r_dentry_get_inode(dent.get()), length);
+    return fs::fs_truncate(dent.get(), length);
 }
 
 int kernel::syscall::do_unlink(const char __user* pathname) {
@@ -457,8 +452,7 @@ int kernel::syscall::do_access(const char __user* pathname, int mode) {
     }
 }
 
-int kernel::syscall::do_mknod(const char __user* pathname, mode_t mode,
-                              dev_t dev) {
+int kernel::syscall::do_mknod(const char __user* pathname, mode_t mode, dev_t dev) {
     mode &= S_IFMT | (~current_process->umask & 0777);
     auto [dent, status] = current_open(pathname);
     if (!dent)

+ 30 - 3
src/kernel/task/thread.cc

@@ -12,7 +12,7 @@
 #include <kernel/task/readyqueue.hpp>
 #include <kernel/task/thread.hpp>
 
-constexpr std::size_t KERNEL_STACK_ORDER = 3; // 2^3 * 4096 = 32KB
+constexpr std::size_t KERNEL_STACK_ORDER = 7; // 2^7 * 4096 = 512KB
 
 using namespace kernel::task;
 using namespace kernel::mem;
@@ -91,7 +91,21 @@ void thread::kernel_stack::load_interrupt_stack() const {
     tss->rsp[0] = sp;
 }
 
-void thread::set_attr(thd_attr_t new_attr) {
+// TODO!!!: change of attribute should acquire dispatcher lock
+//          to prevent inconsistency of tasks in ready queue
+void thread::set_attr(thd_attr_t new_attr, bool forced) {
+    // TODO!!!: rewrite this with state machine based method to prevent
+    // inconsistency and random transition among states
+    if (attr & USLEEP && (new_attr != READY) && (new_attr != USLEEP)) {
+        kmsgf(
+            "[kernel:warn] trying to change thread state of %d from USLEEP to "
+            "%x, might be "
+            "doing something dumb.",
+            this->owner, new_attr);
+
+        return;
+    }
+
     switch (new_attr) {
         case SYSTEM:
             attr |= SYSTEM;
@@ -103,7 +117,14 @@ void thread::set_attr(thd_attr_t new_attr) {
                 break;
             }
 
-            if (attr & READY)
+            if (attr & READY) {
+                kmsgf("[kernel:warn] trying to wake up %d from USLEEP",
+                      this->owner);
+
+                break;
+            }
+
+            if (!forced && attr & USLEEP)
                 break;
 
             attr &= SYSTEM;
@@ -115,6 +136,12 @@ void thread::set_attr(thd_attr_t new_attr) {
             attr &= SYSTEM;
             attr |= ISLEEP;
 
+            dispatcher::dequeue(this);
+            break;
+        case USLEEP:
+            attr &= SYSTEM;
+            attr |= USLEEP;
+
             dispatcher::dequeue(this);
             break;
         case STOPPED:

+ 3 - 0
src/kernel/tty.cpp

@@ -187,6 +187,9 @@ void tty::_echo_char(int c) {
     this->show_char(c);
 }
 
+// TODO!!!: this function is racy as it acesses this->buf without
+//          acquiring this->mtx_buf or doing any synchronization
+//
 // do some ignore and remapping work
 // real commit operation is in _real_commit_char()
 void tty::commit_char(int c) {

+ 56 - 58
src/kernel/vfs.cpp

@@ -17,16 +17,15 @@
 #include <kernel/vfs.hpp>
 #include <kernel/vfs/dentry.hpp>
 
-fs::regular_file::regular_file(file_flags flags, size_t cursor,
-                               struct rust_inode_handle* ind)
-    : file(flags), cursor(cursor), ind(ind) {}
+fs::regular_file::regular_file(file_flags flags, size_t cursor, dentry_pointer dentry)
+    : file(flags), cursor(cursor), dentry(std::move(dentry)) {}
 
 ssize_t fs::regular_file::read(char* __user buf, size_t n) {
     if (!flags.read)
         return -EBADF;
 
     // TODO: copy to user function !IMPORTANT
-    ssize_t n_wrote = fs_read(ind, buf, n, cursor, n);
+    ssize_t n_wrote = fs_read(dentry.get(), buf, n, cursor, n);
     if (n_wrote >= 0)
         cursor += n_wrote;
 
@@ -35,7 +34,7 @@ ssize_t fs::regular_file::read(char* __user buf, size_t n) {
 
 ssize_t fs::regular_file::do_write(const char* __user buf, size_t n) {
     // TODO: check privilege of user ptr
-    ssize_t n_wrote = fs_write(ind, buf, cursor, n);
+    ssize_t n_wrote = fs_write(dentry.get(), buf, cursor, n);
     if (n_wrote >= 0)
         cursor += n_wrote;
 
@@ -43,7 +42,7 @@ ssize_t fs::regular_file::do_write(const char* __user buf, size_t n) {
 }
 
 off_t fs::regular_file::seek(off_t n, int whence) {
-    size_t ind_size = r_get_inode_size(ind);
+    size_t ind_size = r_dentry_get_size(dentry.get());
     size_t pos;
     switch (whence) {
         case SEEK_SET:
@@ -69,28 +68,27 @@ off_t fs::regular_file::seek(off_t n, int whence) {
 
 int fs::regular_file::getdents(char* __user buf, size_t cnt) {
     size_t orig_cnt = cnt;
-    auto callback = readdir_callback_fn(
-        [&buf, &cnt](const char* fn, size_t fnlen, ino_t ino) {
-            size_t reclen = sizeof(fs::user_dirent) + 1 + fnlen;
-            if (cnt < reclen)
-                return -EFAULT;
-
-            auto* dirp = (fs::user_dirent*)buf;
-            dirp->d_ino = ino;
-            dirp->d_reclen = reclen;
-            // TODO: show offset
-            // dirp->d_off = 0;
-            // TODO: use copy_to_user
-            memcpy(dirp->d_name, fn, fnlen);
-            buf[reclen - 2] = 0;
-            buf[reclen - 1] = 0;
-
-            buf += reclen;
-            cnt -= reclen;
-            return 0;
-        });
-
-    int nread = fs_readdir(ind, cursor, &callback);
+    auto callback = readdir_callback_fn([&buf, &cnt](const char* fn, size_t fnlen, ino_t ino) {
+        size_t reclen = sizeof(fs::user_dirent) + 1 + fnlen;
+        if (cnt < reclen)
+            return -EFAULT;
+
+        auto* dirp = (fs::user_dirent*)buf;
+        dirp->d_ino = ino;
+        dirp->d_reclen = reclen;
+        // TODO: show offset
+        // dirp->d_off = 0;
+        // TODO: use copy_to_user
+        memcpy(dirp->d_name, fn, fnlen);
+        buf[reclen - 2] = 0;
+        buf[reclen - 1] = 0;
+
+        buf += reclen;
+        cnt -= reclen;
+        return 0;
+    });
+
+    int nread = fs_readdir(dentry.get(), cursor, &callback);
 
     if (nread > 0)
         cursor += nread;
@@ -100,27 +98,26 @@ int fs::regular_file::getdents(char* __user buf, size_t cnt) {
 
 int fs::regular_file::getdents64(char* __user buf, size_t cnt) {
     size_t orig_cnt = cnt;
-    auto callback = readdir_callback_fn(
-        [&buf, &cnt](const char* fn, size_t fnlen, ino_t ino) {
-            size_t reclen = sizeof(fs::user_dirent64) + fnlen;
-            if (cnt < reclen)
-                return -EFAULT;
-
-            auto* dirp = (fs::user_dirent64*)buf;
-            dirp->d_ino = ino;
-            dirp->d_off = 114514;
-            dirp->d_reclen = reclen;
-            dirp->d_type = 0;
-            // TODO: use copy_to_user
-            memcpy(dirp->d_name, fn, fnlen);
-            buf[reclen - 1] = 0;
-
-            buf += reclen;
-            cnt -= reclen;
-            return 0;
-        });
-
-    int nread = fs_readdir(ind, cursor, &callback);
+    auto callback = readdir_callback_fn([&buf, &cnt](const char* fn, size_t fnlen, ino_t ino) {
+        size_t reclen = sizeof(fs::user_dirent64) + fnlen;
+        if (cnt < reclen)
+            return -EFAULT;
+
+        auto* dirp = (fs::user_dirent64*)buf;
+        dirp->d_ino = ino;
+        dirp->d_off = 114514;
+        dirp->d_reclen = reclen;
+        dirp->d_type = 0;
+        // TODO: use copy_to_user
+        memcpy(dirp->d_name, fn, fnlen);
+        buf[reclen - 1] = 0;
+
+        buf += reclen;
+        cnt -= reclen;
+        return 0;
+    });
+
+    int nread = fs_readdir(dentry.get(), cursor, &callback);
 
     if (nread > 0)
         cursor += nread;
@@ -288,21 +285,19 @@ int fs::pipe::read(char* buf, size_t n) {
     return orig_n - n;
 }
 
-extern "C" int call_callback(const fs::readdir_callback_fn* func,
-                             const char* filename, size_t fnlen, ino_t ino) {
+extern "C" int call_callback(const fs::readdir_callback_fn* func, const char* filename,
+                             size_t fnlen, ino_t ino) {
     return (*func)(filename, fnlen, ino);
 }
 
-extern "C" struct dentry* dentry_open(struct dentry* context_root,
-                                      struct dentry* cwd, const char* path,
-                                      size_t path_length, bool follow);
+extern "C" struct dentry* dentry_open(struct dentry* context_root, struct dentry* cwd,
+                                      const char* path, size_t path_length, bool follow);
 
 std::pair<fs::dentry_pointer, int> fs::open(const fs::fs_context& context,
-                                            const fs::dentry_pointer& cwd,
-                                            types::string_view path,
+                                            const fs::dentry_pointer& cwd, types::string_view path,
                                             bool follow_symlinks) {
-    auto result = dentry_open(context.root.get(), cwd.get(), path.data(),
-                              path.size(), follow_symlinks);
+    auto result =
+        dentry_open(context.root.get(), cwd.get(), path.data(), path.size(), follow_symlinks);
     auto result_int = reinterpret_cast<intptr_t>(result);
 
     if (result_int > -128)
@@ -323,5 +318,8 @@ void fs::dentry_deleter::operator()(struct dentry* dentry) const {
 }
 
 fs::dentry_pointer fs::d_get(const dentry_pointer& dp) {
+    if (!dp)
+        return nullptr;
+
     return dentry_pointer{r_dget(dp.get())};
 }

+ 14 - 59
src/kernel/vfs/dentry.rs

@@ -19,7 +19,7 @@ use bindings::{EINVAL, ELOOP, ENOENT, ENOTDIR};
 use super::inode::Inode;
 
 struct DentryData {
-    inode: Arc<Inode>,
+    inode: Arc<dyn Inode>,
     flags: u64,
 }
 
@@ -125,35 +125,29 @@ impl Dentry {
         Arc::as_ptr(&self.parent)
     }
 
-    fn save_data(&self, inode: Arc<Inode>, flags: u64) -> KResult<()> {
+    fn save_data(&self, inode: Arc<dyn Inode>, flags: u64) -> KResult<()> {
         let new = DentryData { inode, flags };
 
         let old = self.data.swap(Some(Arc::new(new)));
+        // Safety: old data is `None`, so it's safe to emit the `drop` call
         assert!(old.is_none());
 
         Ok(())
     }
 
-    pub fn save_reg(&self, file: Arc<Inode>) -> KResult<()> {
+    pub fn save_reg(&self, file: Arc<dyn Inode>) -> KResult<()> {
         self.save_data(file, D_REGULAR)
     }
 
-    pub fn save_symlink(&self, link: Arc<Inode>) -> KResult<()> {
+    pub fn save_symlink(&self, link: Arc<dyn Inode>) -> KResult<()> {
         self.save_data(link, D_SYMLINK)
     }
 
-    pub fn save_dir(&self, dir: Arc<Inode>) -> KResult<()> {
+    pub fn save_dir(&self, dir: Arc<dyn Inode>) -> KResult<()> {
         self.save_data(dir, D_DIRECTORY)
     }
 
-    pub fn invalidate(&self) -> KResult<()> {
-        let old = self.data.swap(None);
-        assert!(old.is_some());
-
-        Ok(())
-    }
-
-    pub fn get_inode(&self) -> KResult<Arc<Inode>> {
+    pub fn get_inode(&self) -> KResult<Arc<dyn Inode>> {
         self.data
             .load()
             .as_ref()
@@ -200,16 +194,10 @@ impl Dentry {
                 let mut buffer = [0u8; 256];
                 let mut buffer = ByteBuffer::new(&mut buffer);
 
-                data.inode.readlink(&data.inode, &mut buffer)?;
+                data.inode.readlink(&mut buffer)?;
                 let path = Path::new(buffer.data())?;
 
-                let dentry = Self::open_recursive(
-                    context,
-                    &dentry.parent,
-                    path,
-                    true,
-                    nrecur + 1,
-                )?;
+                let dentry = Self::open_recursive(context, &dentry.parent, path, true, nrecur + 1)?;
 
                 Self::resolve_directory(context, dentry, nrecur + 1)
             }
@@ -251,11 +239,7 @@ impl Dentry {
                 PathComponent::TrailingEmpty | PathComponent::Current => {} // pass
                 PathComponent::Parent => {
                     if !cwd.hash_eq(root_dentry.as_ref()) {
-                        cwd = Self::resolve_directory(
-                            context,
-                            cwd.parent.clone(),
-                            nrecur,
-                        )?;
+                        cwd = Self::resolve_directory(context, cwd.parent.clone(), nrecur)?;
                     }
                     continue;
                 }
@@ -275,16 +259,10 @@ impl Dentry {
                     let mut buffer = [0u8; 256];
                     let mut buffer = ByteBuffer::new(&mut buffer);
 
-                    data.inode.readlink(&data.inode, &mut buffer)?;
+                    data.inode.readlink(&mut buffer)?;
                     let path = Path::new(buffer.data())?;
 
-                    cwd = Self::open_recursive(
-                        context,
-                        &cwd.parent,
-                        path,
-                        true,
-                        nrecur + 1,
-                    )?;
+                    cwd = Self::open_recursive(context, &cwd.parent, path, true, nrecur + 1)?;
                 }
             }
         }
@@ -302,18 +280,11 @@ pub extern "C" fn dentry_open(
     follow: bool,
 ) -> *const Dentry {
     match (|| -> KResult<Arc<Dentry>> {
-        let path =
-            Path::new(unsafe { core::slice::from_raw_parts(path, path_len) })?;
+        let path = Path::new(unsafe { core::slice::from_raw_parts(path, path_len) })?;
 
         let context = FsContext { root: context_root };
 
-        Dentry::open_recursive(
-            &context,
-            Dentry::from_raw(&cwd).as_ref(),
-            path,
-            follow,
-            0,
-        )
+        Dentry::open_recursive(&context, Dentry::from_raw(&cwd).as_ref(), path, follow, 0)
     })() {
         Ok(dentry) => Arc::into_raw(dentry),
         Err(err) => (-(err as i32) as usize) as *const Dentry,
@@ -375,22 +346,6 @@ pub extern "C" fn r_dput(dentry: *const Dentry) {
     unsafe { Arc::from_raw(dentry) };
 }
 
-#[no_mangle]
-pub extern "C" fn r_dentry_get_inode(dentry: *const Dentry) -> *const Inode {
-    let dentry = Dentry::from_raw(&dentry);
-
-    match dentry.get_inode() {
-        Ok(inode) => Arc::into_raw(inode),
-        Err(err) => {
-            dont_check!(println!(
-                "[kernel:warn] r_dentry_get_inode: {:?}",
-                err
-            ));
-            core::ptr::null()
-        }
-    }
-}
-
 #[no_mangle]
 pub extern "C" fn r_dentry_is_directory(dentry: *const Dentry) -> bool {
     let dentry = Dentry::from_raw(&dentry);

+ 29 - 17
src/kernel/vfs/dentry/dcache.rs

@@ -1,4 +1,7 @@
-use core::{mem::MaybeUninit, sync::atomic::AtomicPtr};
+use core::{
+    mem::MaybeUninit,
+    sync::atomic::{AtomicPtr, Ordering},
+};
 
 use alloc::sync::Arc;
 use bindings::ENOENT;
@@ -15,23 +18,21 @@ use lazy_static::lazy_static;
 
 const DCACHE_HASH_BITS: u32 = 8;
 
-static DCACHE: [RCUList<Dentry>; 1 << DCACHE_HASH_BITS] =
-    [const { RCUList::new() }; 1 << DCACHE_HASH_BITS];
-
 lazy_static! {
+    static ref DCACHE: [RCUList<Dentry>; 1 << DCACHE_HASH_BITS] =
+        core::array::from_fn(|_| RCUList::new());
     static ref DROOT: Arc<Dentry> = {
         let dentry = Arc::new_uninit();
         let fake_parent = unsafe { dentry.clone().assume_init() };
 
-        unsafe { &mut *(Arc::as_ptr(&dentry) as *mut MaybeUninit<Dentry>) }
-            .write(Dentry {
-                parent: fake_parent,
-                name: b"[root]".as_slice().into(),
-                hash: 0,
-                prev: AtomicPtr::default(),
-                next: AtomicPtr::default(),
-                data: RCUPointer::empty(),
-            });
+        unsafe { &mut *(Arc::as_ptr(&dentry) as *mut MaybeUninit<Dentry>) }.write(Dentry {
+            parent: fake_parent,
+            name: b"[root]".as_slice().into(),
+            hash: 0,
+            prev: AtomicPtr::default(),
+            next: AtomicPtr::default(),
+            data: RCUPointer::empty(),
+        });
 
         unsafe { dentry.assume_init() }
     };
@@ -50,6 +51,7 @@ pub fn d_iter_for(hash: u64) -> RCUIterator<'static, Dentry> {
     d_hinted(hash).iter()
 }
 
+/// Add the dentry to the dcache
 pub fn d_add(dentry: &Arc<Dentry>) {
     d_hinted(dentry.hash).insert(dentry.clone());
 }
@@ -60,26 +62,36 @@ pub fn d_find_fast(dentry: &Arc<Dentry>) -> Option<Arc<Dentry>> {
         .map(|dentry| dentry.clone())
 }
 
+/// Call `lookup()` on the parent inode to try find if the dentry points to a valid inode
+///
 /// Silently fail without any side effects
 pub fn d_try_revalidate(dentry: &Arc<Dentry>) {
     (|| -> KResult<()> {
         let parent = dentry.parent().get_inode()?;
-        let inode = parent.lookup(&parent, dentry)?.ok_or(ENOENT)?;
+        let inode = parent.lookup(dentry)?.ok_or(ENOENT)?;
 
         d_save(dentry, inode)
     })()
     .unwrap_or_default();
 }
 
-pub fn d_save(dentry: &Arc<Dentry>, inode: Arc<Inode>) -> KResult<()> {
-    let mode = inode.idata.lock().mode;
-    match mode {
+/// Save the inode to the dentry.
+///
+/// Dentry flags will be determined by the inode's mode.
+pub fn d_save(dentry: &Arc<Dentry>, inode: Arc<dyn Inode>) -> KResult<()> {
+    match inode.mode.load(Ordering::Acquire) {
         mode if s_isdir(mode) => dentry.save_dir(inode),
         mode if s_islnk(mode) => dentry.save_symlink(inode),
         _ => dentry.save_reg(inode),
     }
 }
 
+/// Replace the old dentry with the new one in the dcache
 pub fn d_replace(old: &Arc<Dentry>, new: Arc<Dentry>) {
     d_hinted(old.hash).replace(old, new);
 }
+
+/// Remove the dentry from the dcache so that later d_find_fast will fail
+pub fn d_remove(dentry: &Arc<Dentry>) {
+    d_hinted(dentry.hash).remove(&dentry);
+}

+ 51 - 72
src/kernel/vfs/ffi.rs

@@ -4,7 +4,10 @@ use crate::{
     prelude::*,
 };
 
-use core::ffi::{c_char, c_void};
+use core::{
+    ffi::{c_char, c_void},
+    sync::atomic::Ordering,
+};
 
 use alloc::sync::Arc;
 use bindings::{dev_t, ino_t, mode_t, statx};
@@ -51,40 +54,28 @@ pub extern "C" fn fs_mount(
     let fstype = get_str_from_cstr(fstype).unwrap();
 
     // TODO: data
-    match super::mount::do_mount(
-        &mountpoint,
-        source,
-        mountpoint_str,
-        fstype,
-        flags,
-        &[],
-    ) {
+    match super::mount::do_mount(&mountpoint, source, mountpoint_str, fstype, flags, &[]) {
         Ok(_) => 0,
         Err(e) => -(e as i32),
     }
 }
 
-fn do_read(
-    file: &Arc<Inode>,
-    buffer: &mut [u8],
-    offset: usize,
-) -> KResult<usize> {
-    let mode = { file.idata.lock().mode };
-
-    match mode {
+fn do_read(file: &Arc<dyn Inode>, buffer: &mut [u8], offset: usize) -> KResult<usize> {
+    // Safety: Changing mode alone will have no effect on the file's contents
+    match file.mode.load(Ordering::Relaxed) {
         mode if s_isdir(mode) => Err(EISDIR),
         mode if s_isreg(mode) => {
             let mut buffer = ByteBuffer::new(buffer);
-            file.read(file, &mut buffer, offset)
+            file.read(&mut buffer, offset)
         }
         mode if s_isblk(mode) => {
             let mut buffer = ByteBuffer::new(buffer);
-            let device = BlockDevice::get(file.devid(file)?)?;
+            let device = BlockDevice::get(file.devid()?)?;
 
             Ok(device.read_some(offset, &mut buffer)?.allow_partial())
         }
         mode if s_ischr(mode) => {
-            let devid = file.devid(file)?;
+            let devid = file.devid()?;
 
             let ret = unsafe {
                 fs::char_device_read(
@@ -105,23 +96,17 @@ fn do_read(
     }
 }
 
-fn do_write(file: &Arc<Inode>, buffer: &[u8], offset: usize) -> KResult<usize> {
-    let mode = file.idata.lock().mode;
-
-    match mode {
+fn do_write(file: &Arc<dyn Inode>, buffer: &[u8], offset: usize) -> KResult<usize> {
+    // Safety: Changing mode alone will have no effect on the file's contents
+    match file.mode.load(Ordering::Relaxed) {
         mode if s_isdir(mode) => Err(EISDIR),
-        mode if s_isreg(mode) => file.write(file, buffer, offset),
+        mode if s_isreg(mode) => file.write(buffer, offset),
         mode if s_isblk(mode) => Err(EINVAL), // TODO
         mode if s_ischr(mode) => {
-            let devid = file.devid(file)?;
+            let devid = file.devid()?;
 
-            let ret = unsafe {
-                fs::char_device_write(
-                    devid,
-                    buffer.as_ptr() as *const _,
-                    buffer.len(),
-                )
-            };
+            let ret =
+                unsafe { fs::char_device_write(devid, buffer.as_ptr() as *const _, buffer.len()) };
 
             if ret < 0 {
                 Err(-ret as u32)
@@ -133,19 +118,16 @@ fn do_write(file: &Arc<Inode>, buffer: &[u8], offset: usize) -> KResult<usize> {
     }
 }
 
-fn inode_from_raw<'lt>(file: &'lt mut *const Inode) -> BorrowedArc<'lt, Inode> {
-    BorrowedArc::new(file)
-}
-
 #[no_mangle]
 pub extern "C" fn fs_read(
-    mut file: *const Inode, // borrowed
+    file: *const Dentry, // borrowed
     buf: *mut u8,
     bufsize: usize,
     offset: usize,
     n: usize,
 ) -> isize {
-    let file = inode_from_raw(&mut file);
+    let file = Dentry::from_raw(&file);
+    let file = file.get_inode().unwrap();
 
     let bufsize = bufsize.min(n);
     let buffer = into_mut_slice(buf, &bufsize);
@@ -158,12 +140,13 @@ pub extern "C" fn fs_read(
 
 #[no_mangle]
 pub extern "C" fn fs_write(
-    mut file: *const Inode, // borrowed
+    file: *const Dentry, // borrowed
     buf: *const u8,
     offset: usize,
     n: usize,
 ) -> isize {
-    let file = inode_from_raw(&mut file);
+    let file = Dentry::from_raw(&file);
+    let file = file.get_inode().unwrap();
     let buffer = into_slice(buf, &n);
 
     match do_write(&file, buffer, offset) {
@@ -174,39 +157,42 @@ pub extern "C" fn fs_write(
 
 #[no_mangle]
 pub extern "C" fn fs_statx(
-    mut file: *const Inode, // borrowed
+    file: *const Dentry, // borrowed
     stat: *mut statx,
     mask: u32,
 ) -> i32 {
     map_err_ffi!((|| {
-        let file = inode_from_raw(&mut file);
+        let file = Dentry::from_raw(&file);
+        let file = file.get_inode().unwrap();
         let statx = unsafe { stat.as_mut() }.unwrap();
 
-        file.statx(file.as_ref(), statx, mask)
+        file.statx(statx, mask)
     })())
 }
 
 #[no_mangle]
 pub extern "C" fn fs_truncate(
-    mut file: *const Inode, // borrowed
+    file: *const Dentry, // borrowed
     size: usize,
 ) -> i32 {
     map_err_ffi!((|| {
-        let file = inode_from_raw(&mut file);
-        file.truncate(file.as_ref(), size)
+        let file = Dentry::from_raw(&file);
+        let file = file.get_inode().unwrap();
+        file.truncate(size)
     })())
 }
 
 #[no_mangle]
 pub extern "C" fn fs_readlink(
-    mut file: *const Inode, // borrowed
+    file: *const Dentry, // borrowed
     mut buf: *mut u8,
     bufsize: usize,
 ) -> i32 {
-    let file = inode_from_raw(&mut file);
+    let file = Dentry::from_raw(&file);
+    let file = file.get_inode().unwrap();
     let mut buffer = RawBuffer::new_from_raw(&mut buf, bufsize);
 
-    match file.readlink(file.as_ref(), &mut buffer) {
+    match file.readlink(&mut buffer) {
         Ok(n) => n as i32,
         Err(e) => -(e as i32),
     }
@@ -222,7 +208,7 @@ pub extern "C" fn fs_creat(
         let parent = at.parent();
         let inode = parent.get_inode()?;
 
-        inode.creat(inode.as_ref(), &at, mode as u32)
+        inode.creat(&at, mode as u32)
     })())
 }
 
@@ -236,7 +222,7 @@ pub extern "C" fn fs_mkdir(
         let parent = at.parent();
         let inode = parent.get_inode()?;
 
-        inode.mkdir(inode.as_ref(), &at, mode as u32)
+        inode.mkdir(&at, mode as u32)
     })())
 }
 
@@ -251,7 +237,7 @@ pub extern "C" fn fs_mknod(
         let parent = at.parent();
         let inode = parent.get_inode()?;
 
-        inode.mknod(inode.as_ref(), &at, mode as u32, dev as DevId)
+        inode.mknod(&at, mode as u32, dev as DevId)
     })())
 }
 
@@ -265,11 +251,7 @@ pub extern "C" fn fs_symlink(
         let parent = at.parent();
         let inode = parent.get_inode()?;
 
-        inode.symlink(
-            inode.as_ref(),
-            &at,
-            get_str_from_cstr(target)?.as_bytes(),
-        )
+        inode.symlink(&at, get_str_from_cstr(target)?.as_bytes())
     })())
 }
 
@@ -280,24 +262,20 @@ pub extern "C" fn fs_unlink(at: *const Dentry) -> i32 {
         let parent = at.parent();
         let inode = parent.get_inode()?;
 
-        inode.unlink(inode.as_ref(), &at)
+        inode.unlink(&at)
     })())
 }
 
 #[no_mangle]
-pub extern "C" fn r_get_inode_mode(mut inode: *const Inode) -> mode_t {
-    let inode = inode_from_raw(&mut inode);
-    let idata = inode.idata.lock();
-
-    idata.mode as _
+pub extern "C" fn r_dentry_get_mode(dentry: *const Dentry) -> mode_t {
+    let dentry = Dentry::from_raw(&dentry);
+    dentry.get_inode().unwrap().mode.load(Ordering::Relaxed) as _
 }
 
 #[no_mangle]
-pub extern "C" fn r_get_inode_size(mut inode: *const Inode) -> u64 {
-    let inode = inode_from_raw(&mut inode);
-    let idata = inode.idata.lock();
-
-    idata.size
+pub extern "C" fn r_dentry_get_size(dentry: *const Dentry) -> u64 {
+    let dentry = Dentry::from_raw(&dentry);
+    dentry.get_inode().unwrap().size.load(Ordering::Relaxed) as _
 }
 
 extern "C" {
@@ -311,13 +289,14 @@ extern "C" {
 
 #[no_mangle]
 pub extern "C" fn fs_readdir(
-    mut file: *const Inode, // borrowed
+    dentry: *const Dentry, // borrowed
     offset: usize,
     callback: *const c_void,
 ) -> i64 {
-    let inode = inode_from_raw(&mut file);
+    let dentry = Dentry::from_raw(&dentry);
+    let dir = dentry.get_inode().unwrap();
 
-    let ret = inode.readdir(inode.as_ref(), offset, &|filename, ino| {
+    let ret = dir.readdir(offset, &|filename, ino| {
         let ret = unsafe {
             call_callback(
                 callback,

+ 16 - 22
src/kernel/vfs/filearr.cc

@@ -24,13 +24,9 @@ struct fditem_comparator {
         return lhs.fd < rhs.fd;
     }
 
-    constexpr bool operator()(int fd, const fditem& rhs) const {
-        return fd < rhs.fd;
-    }
+    constexpr bool operator()(int fd, const fditem& rhs) const { return fd < rhs.fd; }
 
-    constexpr bool operator()(const fditem& lhs, int fd) const {
-        return lhs.fd < fd;
-    }
+    constexpr bool operator()(const fditem& lhs, int fd) const { return lhs.fd < fd; }
 };
 
 // ALL METHODS SHOULD BE CALLED WITH LOCK HELD
@@ -165,9 +161,10 @@ int filearray::close(int fd) {
     return 0;
 }
 
-static inline std::pair<dentry_pointer, int> _open_file(
-    const fs_context& context, const dentry_pointer& cwd,
-    types::string_view filepath, int flags, mode_t mode) {
+static inline std::pair<dentry_pointer, int> _open_file(const fs_context& context,
+                                                        const dentry_pointer& cwd,
+                                                        types::string_view filepath, int flags,
+                                                        mode_t mode) {
     auto [dent, ret] = fs::open(context, cwd, filepath);
     if (!dent)
         return {nullptr, ret};
@@ -189,8 +186,8 @@ static inline std::pair<dentry_pointer, int> _open_file(
 }
 
 // TODO: file opening permissions check
-int filearray::open(const dentry_pointer& cwd, types::string_view filepath,
-                    int flags, mode_t mode) {
+int filearray::open(const dentry_pointer& cwd, types::string_view filepath, int flags,
+                    mode_t mode) {
     lock_guard lck{pimpl->mtx};
 
     auto [dent, ret] = _open_file(*pimpl->context, cwd, filepath, flags, mode);
@@ -199,8 +196,7 @@ int filearray::open(const dentry_pointer& cwd, types::string_view filepath,
     if (ret != 0)
         return ret;
 
-    auto inode = r_dentry_get_inode(dent.get());
-    auto filemode = r_get_inode_mode(inode);
+    auto filemode = r_dentry_get_mode(dent.get());
 
     int fdflag = (flags & O_CLOEXEC) ? FD_CLOEXEC : 0;
 
@@ -221,14 +217,13 @@ int filearray::open(const dentry_pointer& cwd, types::string_view filepath,
     // truncate file
     if (flags & O_TRUNC) {
         if (fflags.write && S_ISREG(filemode)) {
-            auto ret = fs_truncate(inode, 0);
+            auto ret = fs_truncate(dent.get(), 0);
             if (ret != 0)
                 return ret;
         }
     }
 
-    return pimpl->place_new_file(
-        std::make_shared<regular_file>(fflags, 0, inode), fdflag);
+    return pimpl->place_new_file(std::make_shared<regular_file>(fflags, 0, d_get(dent)), fdflag);
 }
 
 int filearray::pipe(int (&pipefd)[2]) {
@@ -237,11 +232,11 @@ int filearray::pipe(int (&pipefd)[2]) {
     if (1) {
         std::shared_ptr<fs::pipe> ppipe{new fs::pipe};
 
-        pipefd[0] = pimpl->place_new_file(
-            std::make_shared<fifo_file>(file::file_flags{1, 0, 0}, ppipe), 0);
+        pipefd[0] =
+            pimpl->place_new_file(std::make_shared<fifo_file>(file::file_flags{1, 0, 0}, ppipe), 0);
 
-        pipefd[1] = pimpl->place_new_file(
-            std::make_shared<fifo_file>(file::file_flags{0, 1, 0}, ppipe), 0);
+        pipefd[1] =
+            pimpl->place_new_file(std::make_shared<fifo_file>(file::file_flags{0, 1, 0}, ppipe), 0);
     }
 
     return 0;
@@ -249,8 +244,7 @@ int filearray::pipe(int (&pipefd)[2]) {
 
 filearray::filearray(std::shared_ptr<impl> ptr) : pimpl{ptr} {}
 
-filearray::filearray(const fs_context* context)
-    : filearray{std::make_shared<impl>()} {
+filearray::filearray(const fs_context* context) : filearray{std::make_shared<impl>()} {
     pimpl->context = context;
 }
 

+ 192 - 191
src/kernel/vfs/inode.rs

@@ -1,236 +1,185 @@
-use core::{ops::Deref, sync::atomic::AtomicU64};
-
-use alloc::{
-    collections::btree_map::{BTreeMap, Entry},
-    sync::{Arc, Weak},
-};
+use alloc::sync::{Arc, Weak};
 use bindings::{
-    statx, EEXIST, EINVAL, EIO, EISDIR, ENOTDIR, EPERM, STATX_ATIME,
-    STATX_BLOCKS, STATX_CTIME, STATX_GID, STATX_INO, STATX_MODE, STATX_MTIME,
-    STATX_NLINK, STATX_SIZE, STATX_TYPE, STATX_UID, S_IFDIR, S_IFMT,
+    statx, EINVAL, EISDIR, ENOTDIR, EPERM, STATX_ATIME, STATX_BLOCKS, STATX_CTIME, STATX_GID,
+    STATX_INO, STATX_MODE, STATX_MTIME, STATX_NLINK, STATX_SIZE, STATX_TYPE, STATX_UID, S_IFDIR,
+    S_IFMT,
 };
-
-use super::{
-    dentry::Dentry, s_isblk, s_ischr, vfs::Vfs, DevId, ReadDirCallback,
-    TimeSpec,
+use core::{
+    mem::MaybeUninit,
+    ptr::addr_of_mut,
+    sync::atomic::{AtomicU32, AtomicU64, Ordering},
 };
+
+use super::{dentry::Dentry, s_isblk, s_ischr, vfs::Vfs, DevId, ReadDirCallback, TimeSpec};
 use crate::{io::Buffer, prelude::*};
 
 pub type Ino = u64;
 pub type AtomicIno = AtomicU64;
+#[allow(dead_code)]
 pub type ISize = u64;
+pub type AtomicISize = AtomicU64;
+#[allow(dead_code)]
 pub type Nlink = u64;
+pub type AtomicNlink = AtomicU64;
+#[allow(dead_code)]
 pub type Uid = u32;
+pub type AtomicUid = AtomicU32;
+#[allow(dead_code)]
 pub type Gid = u32;
+pub type AtomicGid = AtomicU32;
 pub type Mode = u32;
+pub type AtomicMode = AtomicU32;
 
-#[repr(C)]
-#[derive(Default)]
 pub struct InodeData {
-    pub size: ISize,
-    pub nlink: Nlink,
+    pub ino: Ino,
+    pub size: AtomicISize,
+    pub nlink: AtomicNlink,
 
-    pub uid: Uid,
-    pub gid: Gid,
-    pub mode: Mode,
+    pub uid: AtomicUid,
+    pub gid: AtomicGid,
+    pub mode: AtomicMode,
 
-    pub atime: TimeSpec,
-    pub mtime: TimeSpec,
-    pub ctime: TimeSpec,
-}
+    pub atime: Spin<TimeSpec>,
+    pub ctime: Spin<TimeSpec>,
+    pub mtime: Spin<TimeSpec>,
 
-pub struct Inode {
-    pub ino: Ino,
-    pub vfs: Weak<dyn Vfs>,
+    pub rwsem: RwSemaphore<()>,
 
-    pub idata: Mutex<InodeData>,
-    pub ops: Box<dyn InodeOps>,
+    pub vfs: Weak<dyn Vfs>,
 }
 
-impl Deref for Inode {
-    type Target = dyn InodeOps;
-
-    fn deref(&self) -> &Self::Target {
-        self.ops.as_ref()
+impl InodeData {
+    pub fn new(ino: Ino, vfs: Weak<dyn Vfs>) -> Self {
+        Self {
+            ino,
+            vfs,
+            atime: Spin::new(TimeSpec::default()),
+            ctime: Spin::new(TimeSpec::default()),
+            mtime: Spin::new(TimeSpec::default()),
+            rwsem: RwSemaphore::new(()),
+            size: Default::default(),
+            nlink: Default::default(),
+            uid: Default::default(),
+            gid: Default::default(),
+            mode: Default::default(),
+        }
     }
 }
 
+#[allow(dead_code)]
+pub trait InodeInner:
+    Send + Sync + core::ops::Deref<Target = InodeData> + core::ops::DerefMut
+{
+    fn data(&self) -> &InodeData;
+    fn data_mut(&mut self) -> &mut InodeData;
+}
+
 #[allow(unused_variables)]
-pub trait InodeOps: Send + Sync {
-    fn as_any(&self) -> &dyn Any;
-
-    fn lookup(
-        &self,
-        dir: &Inode,
-        dentry: &Arc<Dentry>,
-    ) -> KResult<Option<Arc<Inode>>> {
-        if dir.idata.lock().mode & S_IFDIR == 0 {
-            Err(ENOTDIR)
-        } else {
-            Err(EPERM)
-        }
+pub trait Inode: Send + Sync + InodeInner {
+    fn is_dir(&self) -> bool {
+        self.mode.load(Ordering::SeqCst) & S_IFDIR != 0
     }
 
-    fn creat(&self, dir: &Inode, at: &Arc<Dentry>, mode: Mode) -> KResult<()> {
-        if dir.idata.lock().mode & S_IFDIR == 0 {
-            Err(ENOTDIR)
-        } else {
-            Err(EPERM)
-        }
+    fn lookup(&self, dentry: &Arc<Dentry>) -> KResult<Option<Arc<dyn Inode>>> {
+        Err(if !self.is_dir() { ENOTDIR } else { EPERM })
     }
 
-    fn mkdir(&self, dir: &Inode, at: &Arc<Dentry>, mode: Mode) -> KResult<()> {
-        if dir.idata.lock().mode & S_IFDIR == 0 {
-            Err(ENOTDIR)
-        } else {
-            Err(EPERM)
-        }
+    fn creat(&self, at: &Arc<Dentry>, mode: Mode) -> KResult<()> {
+        Err(if !self.is_dir() { ENOTDIR } else { EPERM })
     }
 
-    fn mknod(
-        &self,
-        dir: &Inode,
-        at: &Arc<Dentry>,
-        mode: Mode,
-        dev: DevId,
-    ) -> KResult<()> {
-        if dir.idata.lock().mode & S_IFDIR == 0 {
-            Err(ENOTDIR)
-        } else {
-            Err(EPERM)
-        }
+    fn mkdir(&self, at: &Arc<Dentry>, mode: Mode) -> KResult<()> {
+        Err(if !self.is_dir() { ENOTDIR } else { EPERM })
     }
 
-    fn unlink(&self, dir: &Inode, at: &Arc<Dentry>) -> KResult<()> {
-        if dir.idata.lock().mode & S_IFDIR == 0 {
-            Err(ENOTDIR)
-        } else {
-            Err(EPERM)
-        }
+    fn mknod(&self, at: &Arc<Dentry>, mode: Mode, dev: DevId) -> KResult<()> {
+        Err(if !self.is_dir() { ENOTDIR } else { EPERM })
     }
 
-    fn symlink(
-        &self,
-        dir: &Inode,
-        at: &Arc<Dentry>,
-        target: &[u8],
-    ) -> KResult<()> {
-        if dir.idata.lock().mode & S_IFDIR == 0 {
-            Err(ENOTDIR)
-        } else {
-            Err(EPERM)
-        }
+    fn unlink(&self, at: &Arc<Dentry>) -> KResult<()> {
+        Err(if !self.is_dir() { ENOTDIR } else { EPERM })
     }
 
-    fn read(
-        &self,
-        inode: &Inode,
-        buffer: &mut dyn Buffer,
-        offset: usize,
-    ) -> KResult<usize> {
-        if inode.idata.lock().mode & S_IFDIR != 0 {
-            Err(EISDIR)
-        } else {
-            Err(EINVAL)
-        }
+    fn symlink(&self, at: &Arc<Dentry>, target: &[u8]) -> KResult<()> {
+        Err(if !self.is_dir() { ENOTDIR } else { EPERM })
     }
 
-    fn write(
-        &self,
-        inode: &Inode,
-        buffer: &[u8],
-        offset: usize,
-    ) -> KResult<usize> {
-        if inode.idata.lock().mode & S_IFDIR != 0 {
-            Err(EISDIR)
-        } else {
-            Err(EINVAL)
-        }
+    fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize> {
+        Err(if self.is_dir() { EISDIR } else { EINVAL })
     }
 
-    fn devid(&self, inode: &Inode) -> KResult<DevId> {
-        if inode.idata.lock().mode & S_IFDIR != 0 {
-            Err(EISDIR)
-        } else {
-            Err(EINVAL)
-        }
+    fn write(&self, buffer: &[u8], offset: usize) -> KResult<usize> {
+        Err(if self.is_dir() { EISDIR } else { EINVAL })
     }
 
-    fn readlink(
-        &self,
-        inode: &Inode,
-        buffer: &mut dyn Buffer,
-    ) -> KResult<usize> {
-        Err(EINVAL)
+    fn devid(&self) -> KResult<DevId> {
+        Err(if self.is_dir() { EISDIR } else { EINVAL })
     }
 
-    fn truncate(&self, inode: &Inode, length: usize) -> KResult<()> {
-        if inode.idata.lock().mode & S_IFDIR != 0 {
-            Err(EISDIR)
-        } else {
-            Err(EPERM)
-        }
+    fn readlink(&self, buffer: &mut dyn Buffer) -> KResult<usize> {
+        Err(if self.is_dir() { EISDIR } else { EINVAL })
+    }
+
+    fn truncate(&self, length: usize) -> KResult<()> {
+        Err(if self.is_dir() { EISDIR } else { EPERM })
     }
 
     fn readdir<'cb, 'r: 'cb>(
         &'r self,
-        inode: &'r Inode,
         offset: usize,
         callback: &ReadDirCallback<'cb>,
     ) -> KResult<usize> {
-        if inode.idata.lock().mode & S_IFDIR == 0 {
-            Err(ENOTDIR)
-        } else {
-            Err(EPERM)
-        }
+        Err(if !self.is_dir() { ENOTDIR } else { EPERM })
     }
 
-    fn statx(&self, inode: &Inode, stat: &mut statx, mask: u32) -> KResult<()> {
-        let (fsdev, io_blksize) = {
-            let vfs = inode.vfs.upgrade().ok_or(EIO)?;
-            (vfs.fs_devid(), vfs.io_blksize())
-        };
-        let devid = self.devid(inode);
+    fn statx(&self, stat: &mut statx, mask: u32) -> KResult<()> {
+        // Safety: ffi should have checked reference
+        let vfs = self.vfs.upgrade().expect("Vfs is dropped");
 
-        let idata = inode.idata.lock();
+        let size = self.size.load(Ordering::Relaxed);
+        let mode = self.mode.load(Ordering::Relaxed);
 
         if mask & STATX_NLINK != 0 {
-            stat.stx_nlink = idata.nlink as _;
+            stat.stx_nlink = self.nlink.load(Ordering::Acquire) as _;
             stat.stx_mask |= STATX_NLINK;
         }
 
         if mask & STATX_ATIME != 0 {
-            stat.stx_atime.tv_nsec = idata.atime.nsec as _;
-            stat.stx_atime.tv_sec = idata.atime.sec as _;
+            let atime = self.atime.lock();
+            stat.stx_atime.tv_nsec = atime.nsec as _;
+            stat.stx_atime.tv_sec = atime.sec as _;
             stat.stx_mask |= STATX_ATIME;
         }
 
         if mask & STATX_MTIME != 0 {
-            stat.stx_mtime.tv_nsec = idata.mtime.nsec as _;
-            stat.stx_mtime.tv_sec = idata.mtime.sec as _;
+            let mtime = self.mtime.lock();
+            stat.stx_mtime.tv_nsec = mtime.nsec as _;
+            stat.stx_mtime.tv_sec = mtime.sec as _;
             stat.stx_mask |= STATX_MTIME;
         }
 
         if mask & STATX_CTIME != 0 {
-            stat.stx_ctime.tv_nsec = idata.ctime.nsec as _;
-            stat.stx_ctime.tv_sec = idata.ctime.sec as _;
+            let ctime = self.ctime.lock();
+            stat.stx_ctime.tv_nsec = ctime.nsec as _;
+            stat.stx_ctime.tv_sec = ctime.sec as _;
             stat.stx_mask |= STATX_CTIME;
         }
 
         if mask & STATX_SIZE != 0 {
-            stat.stx_size = idata.size as _;
+            stat.stx_size = self.size.load(Ordering::Relaxed) as _;
             stat.stx_mask |= STATX_SIZE;
         }
 
         stat.stx_mode = 0;
         if mask & STATX_MODE != 0 {
-            stat.stx_mode |= (idata.mode & !S_IFMT) as u16;
+            stat.stx_mode |= (mode & !S_IFMT) as u16;
             stat.stx_mask |= STATX_MODE;
         }
 
         if mask & STATX_TYPE != 0 {
-            stat.stx_mode |= (idata.mode & S_IFMT) as u16;
-            if s_isblk(idata.mode) || s_ischr(idata.mode) {
+            stat.stx_mode |= (mode & S_IFMT) as u16;
+            if s_isblk(mode) || s_ischr(mode) {
+                let devid = self.devid();
                 stat.stx_rdev_major = (devid? >> 8) & 0xff;
                 stat.stx_rdev_minor = devid? & 0xff;
             }
@@ -238,26 +187,27 @@ pub trait InodeOps: Send + Sync {
         }
 
         if mask & STATX_INO != 0 {
-            stat.stx_ino = inode.ino as _;
+            stat.stx_ino = self.ino as _;
             stat.stx_mask |= STATX_INO;
         }
 
         if mask & STATX_BLOCKS != 0 {
-            stat.stx_blocks = (idata.size + 512 - 1) / 512;
-            stat.stx_blksize = io_blksize as _;
+            stat.stx_blocks = (size + 512 - 1) / 512;
+            stat.stx_blksize = vfs.io_blksize() as _;
             stat.stx_mask |= STATX_BLOCKS;
         }
 
         if mask & STATX_UID != 0 {
-            stat.stx_uid = idata.uid as _;
+            stat.stx_uid = self.uid.load(Ordering::Relaxed) as _;
             stat.stx_mask |= STATX_UID;
         }
 
         if mask & STATX_GID != 0 {
-            stat.stx_gid = idata.gid as _;
+            stat.stx_gid = self.gid.load(Ordering::Relaxed) as _;
             stat.stx_mask |= STATX_GID;
         }
 
+        let fsdev = vfs.fs_devid();
         stat.stx_dev_major = (fsdev >> 8) & 0xff;
         stat.stx_dev_minor = fsdev & 0xff;
 
@@ -266,49 +216,100 @@ pub trait InodeOps: Send + Sync {
 
         Ok(())
     }
-}
 
-pub struct InodeCache<Fs: Vfs + 'static> {
-    cache: BTreeMap<Ino, Arc<Inode>>,
-    vfs: Weak<Fs>,
+    fn new_locked<F>(ino: Ino, vfs: Weak<dyn Vfs>, f: F) -> Arc<Self>
+    where
+        Self: Sized,
+        F: FnOnce(*mut Self, &()),
+    {
+        let mut uninit = Arc::<Self>::new_uninit();
+
+        let uninit_mut = Arc::get_mut(&mut uninit).unwrap();
+
+        // Safety: `idata` is owned by `uninit`
+        let idata = unsafe {
+            addr_of_mut!(*(*uninit_mut.as_mut_ptr()).data_mut())
+                .cast::<MaybeUninit<InodeData>>()
+                .as_mut()
+                .unwrap()
+        };
+
+        idata.write(InodeData::new(ino, vfs));
+
+        f(
+            uninit_mut.as_mut_ptr(),
+            // Safety: `idata` is initialized
+            &unsafe { idata.assume_init_ref() }.rwsem.lock_shared(),
+        );
+
+        // Safety: `uninit` is initialized
+        unsafe { uninit.assume_init() }
+    }
 }
 
-impl<Fs: Vfs> InodeCache<Fs> {
-    pub fn new(vfs: Weak<Fs>) -> Self {
-        Self {
-            cache: BTreeMap::new(),
-            vfs,
+// TODO: define multiple inode structs a time
+macro_rules! define_struct_inode {
+    ($v:vis struct $inode_t:ident;) => {
+        $v struct $inode_t {
+            /// Do not use this directly
+            idata: $crate::kernel::vfs::inode::InodeData,
         }
-    }
 
-    pub fn vfs(&self) -> Weak<Fs> {
-        self.vfs.clone()
-    }
+        impl core::ops::Deref for $inode_t {
+            type Target = $crate::kernel::vfs::inode::InodeData;
 
-    pub fn alloc(&self, ino: Ino, ops: Box<dyn InodeOps>) -> Arc<Inode> {
-        Arc::new(Inode {
-            ino,
-            vfs: self.vfs.clone(),
-            idata: Mutex::new(InodeData::default()),
-            ops,
-        })
-    }
+            fn deref(&self) -> &Self::Target {
+                &self.idata
+            }
+        }
 
-    pub fn submit(&mut self, inode: &Arc<Inode>) -> KResult<()> {
-        match self.cache.entry(inode.ino) {
-            Entry::Occupied(_) => Err(EEXIST),
-            Entry::Vacant(entry) => {
-                entry.insert(inode.clone());
-                Ok(())
+        impl core::ops::DerefMut for $inode_t {
+            fn deref_mut(&mut self) -> &mut Self::Target {
+                &mut self.idata
             }
         }
-    }
 
-    pub fn get(&self, ino: Ino) -> Option<Arc<Inode>> {
-        self.cache.get(&ino).cloned()
-    }
+        impl $crate::kernel::vfs::inode::InodeInner for $inode_t {
+            fn data(&self) -> &$crate::kernel::vfs::inode::InodeData {
+                &self.idata
+            }
 
-    pub fn free(&mut self, ino: Ino) {
-        self.cache.remove(&ino);
-    }
+            fn data_mut(&mut self) -> &mut $crate::kernel::vfs::inode::InodeData {
+                &mut self.idata
+            }
+        }
+    };
+    ($v:vis struct $inode_t:ident { $($vis:vis $name:ident: $type:ty,)* }) => {
+        $v struct $inode_t {
+            /// Do not use this directly
+            idata: $crate::kernel::vfs::inode::InodeData,
+            $($vis $name: $type,)*
+        }
+
+        impl core::ops::Deref for $inode_t {
+            type Target = $crate::kernel::vfs::inode::InodeData;
+
+            fn deref(&self) -> &Self::Target {
+                &self.idata
+            }
+        }
+
+        impl core::ops::DerefMut for $inode_t {
+            fn deref_mut(&mut self) -> &mut Self::Target {
+                &mut self.idata
+            }
+        }
+
+        impl $crate::kernel::vfs::inode::InodeInner for $inode_t {
+            fn data(&self) -> &$crate::kernel::vfs::inode::InodeData {
+                &self.idata
+            }
+
+            fn data_mut(&mut self) -> &mut $crate::kernel::vfs::inode::InodeData {
+                &mut self.idata
+            }
+        }
+    };
 }
+
+pub(crate) use define_struct_inode;

+ 13 - 10
src/kernel/vfs/mount.rs

@@ -6,11 +6,12 @@ use alloc::{
 };
 use bindings::{EEXIST, ENODEV, ENOTDIR};
 
+use lazy_static::lazy_static;
+
 use super::{
     dentry::{dcache, Dentry},
     inode::Inode,
     vfs::Vfs,
-    Mutex,
 };
 
 pub const MS_RDONLY: u64 = 1 << 0;
@@ -31,10 +32,12 @@ const MOUNT_FLAGS: [(u64, &str); 6] = [
     (MS_LAZYTIME, ",lazytime"),
 ];
 
-static MOUNT_CREATORS: Mutex<BTreeMap<String, Box<dyn MountCreator>>> =
-    Mutex::new(BTreeMap::new());
-
-static MOUNTS: Mutex<Vec<(Arc<Dentry>, MountPointData)>> = Mutex::new(vec![]);
+lazy_static! {
+    static ref MOUNT_CREATORS: Spin<BTreeMap<String, Arc<dyn MountCreator>>> =
+        Spin::new(BTreeMap::new());
+    static ref MOUNTS: Spin<Vec<(Arc<Dentry>, MountPointData)>> =
+        Spin::new(vec![]);
+}
 
 static mut ROOTFS: Option<Arc<Dentry>> = None;
 
@@ -47,7 +50,7 @@ impl Mount {
     pub fn new(
         mp: &Dentry,
         vfs: Arc<dyn Vfs>,
-        root_inode: Arc<Inode>,
+        root_inode: Arc<dyn Inode>,
     ) -> KResult<Self> {
         let root_dentry = Dentry::create(mp.parent().clone(), mp.name());
         root_dentry.save_dir(root_inode)?;
@@ -78,7 +81,7 @@ pub trait MountCreator: Send + Sync {
 
 pub fn register_filesystem(
     fstype: &str,
-    creator: Box<dyn MountCreator>,
+    creator: Arc<dyn MountCreator>,
 ) -> KResult<()> {
     let mut creators = MOUNT_CREATORS.lock();
     match creators.entry(String::from(fstype)) {
@@ -119,11 +122,11 @@ pub fn do_mount(
         return Err(ENOTDIR);
     }
 
-    let mount = {
+    let creator = {
         let creators = { MOUNT_CREATORS.lock() };
-        let creator = creators.get(fstype).ok_or(ENODEV)?;
-        creator.create_mount(source, flags, data, mountpoint)?
+        creators.get(fstype).ok_or(ENODEV)?.clone()
     };
+    let mount = creator.create_mount(source, flags, data, mountpoint)?;
 
     let root_dentry = mount.root().clone();
 

+ 2 - 3
src/kernel/vfs/vfs.rs

@@ -2,9 +2,8 @@ use crate::prelude::*;
 
 use super::DevId;
 
-#[allow(unused_variables)]
-pub trait Vfs: Send + Sync {
+pub trait Vfs: Send + Sync + AsAny {
     fn io_blksize(&self) -> usize;
     fn fs_devid(&self) -> DevId;
-    fn as_any(&self) -> &dyn Any;
+    fn is_read_only(&self) -> bool;
 }

+ 1 - 1
src/lib.rs

@@ -24,7 +24,7 @@ use prelude::*;
 
 #[panic_handler]
 fn panic(info: &core::panic::PanicInfo) -> ! {
-    dont_check!(println!("[kernel] panic: {:?}", info));
+    println_fatal!("panicked at {:?}\n\t\t{}", info.location(), info.message());
 
     unsafe { bindings::root::freeze() };
 }

+ 12 - 11
src/net/netdev.rs

@@ -1,7 +1,8 @@
 use alloc::{collections::btree_map::BTreeMap, sync::Arc};
-use spin::Mutex;
 
-use crate::bindings::root::EFAULT;
+use crate::{bindings::root::EFAULT, prelude::*};
+
+use lazy_static::lazy_static;
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum LinkStatus {
@@ -19,7 +20,7 @@ pub enum LinkSpeed {
 
 pub type Mac = [u8; 6];
 
-pub trait Netdev {
+pub trait Netdev: Send {
     fn up(&mut self) -> Result<(), u32>;
     fn send(&mut self, data: &[u8]) -> Result<(), u32>;
     fn fire(&mut self) -> Result<(), u32>;
@@ -51,12 +52,14 @@ impl Ord for dyn Netdev {
     }
 }
 
-static mut NETDEVS_ID: Mutex<u32> = Mutex::new(0);
-static mut NETDEVS: Mutex<BTreeMap<u32, Arc<Mutex<dyn Netdev>>>> =
-    Mutex::new(BTreeMap::new());
+lazy_static! {
+    static ref NETDEVS_ID: Spin<u32> = Spin::new(0);
+    static ref NETDEVS: Spin<BTreeMap<u32, Arc<Mutex<dyn Netdev>>>> =
+        Spin::new(BTreeMap::new());
+}
 
 pub fn alloc_id() -> u32 {
-    let mut id = unsafe { NETDEVS_ID.lock() };
+    let mut id = NETDEVS_ID.lock();
     let retval = *id;
 
     *id += 1;
@@ -68,7 +71,7 @@ pub fn register_netdev(
 ) -> Result<Arc<Mutex<dyn Netdev>>, u32> {
     let devid = netdev.id();
 
-    let mut netdevs = unsafe { NETDEVS.lock() };
+    let mut netdevs = NETDEVS.lock();
 
     use alloc::collections::btree_map::Entry;
     match netdevs.entry(devid) {
@@ -82,7 +85,5 @@ pub fn register_netdev(
 }
 
 pub fn get_netdev(id: u32) -> Option<Arc<Mutex<dyn Netdev>>> {
-    let netdevs = unsafe { NETDEVS.lock() };
-
-    netdevs.get(&id).map(|netdev| netdev.clone())
+    NETDEVS.lock().get(&id).map(|netdev| netdev.clone())
 }

+ 34 - 145
src/prelude.rs

@@ -18,7 +18,12 @@ pub(crate) use dont_check;
 pub use crate::bindings::root as bindings;
 
 #[allow(unused_imports)]
-pub(crate) use crate::kernel::console::{print, println};
+pub(crate) use crate::kernel::console::{
+    print, println, println_debug, println_fatal, println_info, println_warn,
+};
+
+#[allow(unused_imports)]
+pub(crate) use crate::sync::might_sleep;
 
 #[allow(unused_imports)]
 pub(crate) use alloc::{boxed::Box, string::String, vec, vec::Vec};
@@ -27,150 +32,7 @@ pub(crate) use alloc::{boxed::Box, string::String, vec, vec::Vec};
 pub(crate) use core::{any::Any, fmt::Write, marker::PhantomData, str};
 use core::{mem::ManuallyDrop, ops::Deref};
 
-pub struct Yield;
-
-extern "C" {
-    fn r_preempt_disable();
-    fn r_preempt_enable();
-}
-
-#[inline(always)]
-pub fn preempt_disable() {
-    unsafe {
-        r_preempt_disable();
-    }
-}
-
-#[inline(always)]
-pub fn preempt_enable() {
-    unsafe {
-        r_preempt_enable();
-    }
-}
-
-impl spin::RelaxStrategy for Yield {
-    fn relax() {
-        panic!("ohohoh");
-    }
-}
-
-#[derive(Debug)]
-#[repr(transparent)]
-pub struct PreemptGuard;
-
-impl PreemptGuard {
-    #[inline(always)]
-    pub fn new() -> Self {
-        preempt_disable();
-        Self
-    }
-}
-
-impl Drop for PreemptGuard {
-    #[inline(always)]
-    fn drop(&mut self) {
-        preempt_enable();
-    }
-}
-
-#[repr(transparent)]
-pub struct MutexNoPreemptionGuard<'a, T: ?Sized> {
-    data_guard: spin::mutex::MutexGuard<'a, T>,
-    preempt_guard: PreemptGuard,
-}
-
-impl<'a, T: ?Sized> MutexNoPreemptionGuard<'a, T> {
-    #[inline(always)]
-    pub fn new(
-        preempt_guard: PreemptGuard,
-        data_guard: spin::mutex::MutexGuard<'a, T>,
-    ) -> Self {
-        Self {
-            data_guard,
-            preempt_guard,
-        }
-    }
-}
-
-impl<'a, T: ?Sized> core::ops::Deref for MutexNoPreemptionGuard<'a, T> {
-    type Target = <spin::mutex::MutexGuard<'a, T> as core::ops::Deref>::Target;
-
-    #[inline(always)]
-    fn deref(&self) -> &Self::Target {
-        &*self.data_guard
-    }
-}
-
-impl<'a, T: ?Sized> core::ops::DerefMut for MutexNoPreemptionGuard<'a, T> {
-    #[inline(always)]
-    fn deref_mut(&mut self) -> &mut Self::Target {
-        &mut *self.data_guard
-    }
-}
-
-impl<'a, T: ?Sized> AsRef<T> for MutexNoPreemptionGuard<'a, T> {
-    #[inline(always)]
-    fn as_ref(&self) -> &T {
-        &*self.data_guard
-    }
-}
-
-impl<'a, T: ?Sized> AsMut<T> for MutexNoPreemptionGuard<'a, T> {
-    #[inline(always)]
-    fn as_mut(&mut self) -> &mut T {
-        &mut *self.data_guard
-    }
-}
-
-#[repr(transparent)]
-pub struct MutexNoPreemption<T: ?Sized> {
-    lock: spin::mutex::Mutex<T, spin::Spin>,
-}
-
-impl<T> MutexNoPreemption<T> {
-    #[inline(always)]
-    pub const fn new(value: T) -> Self {
-        Self {
-            lock: spin::mutex::Mutex::new(value),
-        }
-    }
-}
-
-#[allow(dead_code)]
-impl<T: ?Sized> MutexNoPreemption<T> {
-    #[inline(always)]
-    pub fn lock(&self) -> MutexNoPreemptionGuard<T> {
-        let preempt_guard = PreemptGuard::new();
-        let data_guard = self.lock.lock();
-
-        MutexNoPreemptionGuard::new(preempt_guard, data_guard)
-    }
-
-    #[inline(always)]
-    pub fn is_locked(&self) -> bool {
-        self.lock.is_locked()
-    }
-
-    #[inline(always)]
-    pub fn try_lock(&self) -> Option<MutexNoPreemptionGuard<T>> {
-        let preempt_guard = PreemptGuard::new();
-        let data_guard = self.lock.try_lock();
-
-        data_guard.map(|data_guard| {
-            MutexNoPreemptionGuard::new(preempt_guard, data_guard)
-        })
-    }
-
-    #[inline(always)]
-    pub fn get_mut(&mut self) -> &mut T {
-        self.lock.get_mut()
-    }
-}
-
-#[allow(dead_code)]
-pub type RwLock<T> = spin::rwlock::RwLock<T, Yield>;
-pub type RwLockReadGuard<'a, T> = spin::rwlock::RwLockReadGuard<'a, T>;
-pub type Mutex<T> = MutexNoPreemption<T>;
+pub use crate::sync::{Mutex, RwSemaphore, Semaphore, Spin};
 
 pub struct BorrowedArc<'lt, T: ?Sized> {
     arc: ManuallyDrop<Arc<T>>,
@@ -208,3 +70,30 @@ impl<'lt, T: ?Sized> AsRef<Arc<T>> for BorrowedArc<'lt, T> {
         &self.arc
     }
 }
+
+pub trait AsAny: Send + Sync {
+    fn as_any(&self) -> &dyn Any;
+    fn as_any_mut(&mut self) -> &mut dyn Any;
+}
+
+macro_rules! impl_any {
+    ($t:ty) => {
+        impl AsAny for $t {
+            fn as_any(&self) -> &dyn Any {
+                self
+            }
+
+            fn as_any_mut(&mut self) -> &mut dyn Any {
+                self
+            }
+        }
+    };
+}
+
+macro_rules! addr_of_mut_field {
+    ($pointer:expr, $field:ident) => {
+        core::ptr::addr_of_mut!((*$pointer).$field)
+    };
+}
+
+pub(crate) use {addr_of_mut_field, impl_any};

+ 31 - 31
src/rcu.rs

@@ -3,23 +3,30 @@ use core::{
     sync::atomic::{AtomicPtr, Ordering},
 };
 
-use crate::prelude::*;
+use crate::{
+    prelude::*,
+    sync::{lock::Guard, semaphore::RwSemaphoreStrategy},
+};
 
 use alloc::sync::Arc;
 
+use lazy_static::lazy_static;
+
 pub struct RCUReadGuard<'data, T: 'data> {
     value: T,
-    guard: RwLockReadGuard<'static, ()>,
+    guard: Guard<'data, (), RwSemaphoreStrategy, false>,
     _phantom: PhantomData<&'data T>,
 }
 
-static READ_GUARD: RwLock<()> = RwLock::new(());
+lazy_static! {
+    static ref GLOBAL_RCU_SEM: RwSemaphore<()> = RwSemaphore::new(());
+}
 
 impl<'data, T: 'data> RCUReadGuard<'data, T> {
     fn lock(value: T) -> Self {
         Self {
             value,
-            guard: READ_GUARD.read(),
+            guard: GLOBAL_RCU_SEM.lock_shared(),
             _phantom: PhantomData,
         }
     }
@@ -34,7 +41,7 @@ impl<'data, T: 'data> Deref for RCUReadGuard<'data, T> {
 }
 
 fn rcu_sync() {
-    READ_GUARD.write();
+    GLOBAL_RCU_SEM.lock();
 }
 
 pub trait RCUNode<MySelf> {
@@ -45,15 +52,15 @@ pub trait RCUNode<MySelf> {
 pub struct RCUList<T: RCUNode<T>> {
     head: AtomicPtr<T>,
 
-    reader_lock: RwLock<()>,
+    reader_lock: RwSemaphore<()>,
     update_lock: Mutex<()>,
 }
 
 impl<T: RCUNode<T>> RCUList<T> {
-    pub const fn new() -> Self {
+    pub fn new() -> Self {
         Self {
             head: AtomicPtr::new(core::ptr::null_mut()),
-            reader_lock: RwLock::new(()),
+            reader_lock: RwSemaphore::new(()),
             update_lock: Mutex::new(()),
         }
     }
@@ -68,17 +75,16 @@ impl<T: RCUNode<T>> RCUList<T> {
         new_node.rcu_next().store(old_head, Ordering::Release);
 
         if let Some(old_head) = unsafe { old_head.as_ref() } {
-            old_head.rcu_prev().store(
-                Arc::into_raw(new_node.clone()) as *mut _,
-                Ordering::Release,
-            );
+            old_head
+                .rcu_prev()
+                .store(Arc::into_raw(new_node.clone()) as *mut _, Ordering::Release);
         }
 
         self.head
             .store(Arc::into_raw(new_node) as *mut _, Ordering::Release);
     }
 
-    pub fn remove(&self, node: Arc<T>) {
+    pub fn remove(&self, node: &Arc<T>) {
         let _lck = self.update_lock.lock();
 
         let prev = node.rcu_prev().load(Ordering::Acquire);
@@ -91,21 +97,19 @@ impl<T: RCUNode<T>> RCUList<T> {
         }
 
         {
-            let prev_next = unsafe { prev.as_ref().map(|rcu| rcu.rcu_next()) }
-                .unwrap_or(&self.head);
+            let prev_next =
+                unsafe { prev.as_ref().map(|rcu| rcu.rcu_next()) }.unwrap_or(&self.head);
 
             let me = prev_next.swap(next, Ordering::AcqRel);
             debug_assert!(me == Arc::as_ptr(&node) as *mut _);
             unsafe { Arc::from_raw(me) };
         }
 
-        let _lck = self.reader_lock.write();
+        let _lck = self.reader_lock.lock();
         node.rcu_prev()
             .store(core::ptr::null_mut(), Ordering::Release);
         node.rcu_next()
             .store(core::ptr::null_mut(), Ordering::Release);
-
-        drop(node);
     }
 
     pub fn replace(&self, old_node: &Arc<T>, new_node: Arc<T>) {
@@ -118,29 +122,25 @@ impl<T: RCUNode<T>> RCUList<T> {
         new_node.rcu_next().store(next, Ordering::Release);
 
         {
-            let prev_next = unsafe { prev.as_ref().map(|rcu| rcu.rcu_next()) }
-                .unwrap_or(&self.head);
+            let prev_next =
+                unsafe { prev.as_ref().map(|rcu| rcu.rcu_next()) }.unwrap_or(&self.head);
 
-            let old = prev_next.swap(
-                Arc::into_raw(new_node.clone()) as *mut _,
-                Ordering::AcqRel,
-            );
+            let old = prev_next.swap(Arc::into_raw(new_node.clone()) as *mut _, Ordering::AcqRel);
 
             debug_assert!(old == Arc::as_ptr(&old_node) as *mut _);
             unsafe { Arc::from_raw(old) };
         }
 
         if let Some(next) = unsafe { next.as_ref() } {
-            let old = next.rcu_prev().swap(
-                Arc::into_raw(new_node.clone()) as *mut _,
-                Ordering::AcqRel,
-            );
+            let old = next
+                .rcu_prev()
+                .swap(Arc::into_raw(new_node.clone()) as *mut _, Ordering::AcqRel);
 
             debug_assert!(old == Arc::as_ptr(&old_node) as *mut _);
             unsafe { Arc::from_raw(old) };
         }
 
-        let _lck = self.reader_lock.write();
+        let _lck = self.reader_lock.lock();
         old_node
             .rcu_prev()
             .store(core::ptr::null_mut(), Ordering::Release);
@@ -150,7 +150,7 @@ impl<T: RCUNode<T>> RCUList<T> {
     }
 
     pub fn iter(&self) -> RCUIterator<T> {
-        let _lck = self.reader_lock.read();
+        let _lck = self.reader_lock.lock_shared();
 
         RCUIterator {
             // SAFETY: We have a read lock, so the node is still alive.
@@ -162,7 +162,7 @@ impl<T: RCUNode<T>> RCUList<T> {
 
 pub struct RCUIterator<'lt, T: RCUNode<T>> {
     cur: *const T,
-    _lock: RwLockReadGuard<'lt, ()>,
+    _lock: Guard<'lt, (), RwSemaphoreStrategy, false>,
 }
 
 impl<'lt, T: RCUNode<T>> Iterator for RCUIterator<'lt, T> {

+ 82 - 3
src/sync.rs

@@ -1,10 +1,56 @@
-pub struct Locked<T: Sized + Sync, U: ?Sized> {
+pub mod condvar;
+pub mod lock;
+pub mod semaphore;
+pub mod spin;
+pub mod strategy;
+
+extern "C" {
+    fn r_preempt_disable();
+    fn r_preempt_enable();
+}
+
+#[inline(always)]
+fn preempt_disable() {
+    unsafe {
+        r_preempt_disable();
+    }
+}
+
+#[inline(always)]
+fn preempt_enable() {
+    unsafe {
+        r_preempt_enable();
+    }
+}
+
+pub type Spin<T> = lock::Lock<T, spin::SpinStrategy>;
+pub type Mutex<T> = lock::Lock<T, semaphore::SemaphoreStrategy<1>>;
+#[allow(dead_code)]
+pub type Semaphore<T> = lock::Lock<T, semaphore::SemaphoreStrategy>;
+pub type RwSemaphore<T> = lock::Lock<T, semaphore::RwSemaphoreStrategy>;
+
+#[allow(dead_code)]
+pub type SpinGuard<'lock, T> = lock::Guard<'lock, T, spin::SpinStrategy, true>;
+
+#[allow(dead_code)]
+pub type MutexGuard<'lock, T> = lock::Guard<'lock, T, semaphore::SemaphoreStrategy<1>, true>;
+
+#[allow(dead_code)]
+pub type SemGuard<'lock, T> = lock::Guard<'lock, T, semaphore::SemaphoreStrategy, true>;
+
+#[allow(dead_code)]
+pub type RwSemReadGuard<'lock, T> = lock::Guard<'lock, T, semaphore::RwSemaphoreStrategy, false>;
+
+#[allow(dead_code)]
+pub type RwSemWriteGuard<'lock, T> = lock::Guard<'lock, T, semaphore::RwSemaphoreStrategy, true>;
+
+pub struct Locked<T: Sized, U: ?Sized> {
     inner: T,
     guard: *const U,
 }
 
-unsafe impl<T: Sized + Sync, U: ?Sized> Sync for Locked<T, U> {}
-unsafe impl<T: Sized + Sync, U: ?Sized> Send for Locked<T, U> {}
+unsafe impl<T: Sized + Send, U: ?Sized> Send for Locked<T, U> {}
+unsafe impl<T: Sized + Send + Sync, U: ?Sized> Sync for Locked<T, U> {}
 
 impl<T: Sized + Sync, U: ?Sized> Locked<T, U> {
     pub fn new(value: T, from: &U) -> Self {
@@ -24,3 +70,36 @@ impl<T: Sized + Sync, U: ?Sized> Locked<T, U> {
         unsafe { &mut *(&raw const self.inner as *mut T) }
     }
 }
+
+macro_rules! might_sleep {
+    () => {
+        if cfg!(debug_assertions) {
+            if unsafe { $crate::bindings::root::kernel::async_::preempt_count() } != 0 {
+                println_fatal!("failed assertion");
+                unsafe { $crate::bindings::root::freeze() };
+            }
+        } else {
+            assert_eq!(
+                unsafe { $crate::bindings::root::kernel::async_::preempt_count() },
+                0,
+                "a might_sleep function called with preempt disabled"
+            );
+        }
+    };
+    ($n:expr) => {
+        if cfg!(debug_assertions) {
+            if unsafe { $crate::bindings::root::kernel::async_::preempt_count() } != $n {
+                println_fatal!("failed assertion");
+                unsafe { $crate::bindings::root::freeze() };
+            }
+        } else {
+            assert_eq!(
+                unsafe { $crate::bindings::root::kernel::async_::preempt_count() },
+                $n,
+                "a might_sleep function called with the preempt count not satisfying its requirement",
+            );
+        }
+    };
+}
+
+pub(crate) use might_sleep;

+ 113 - 0
src/sync/condvar.rs

@@ -0,0 +1,113 @@
+use alloc::collections::vec_deque::VecDeque;
+use bindings::{
+    current_thread,
+    kernel::task::{thread, thread_ISLEEP, thread_READY, thread_USLEEP},
+    schedule_now_preempt_disabled,
+};
+
+use crate::{prelude::*, sync::preempt_disable};
+
+use super::{lock::Guard, strategy::LockStrategy};
+
+/// `current` should be per CPU, so no sync is needed
+fn current() -> &'static mut *mut thread {
+    #[allow(static_mut_refs)]
+    unsafe {
+        &mut current_thread
+    }
+}
+
+pub struct CondVar {
+    waiters: Spin<VecDeque<*mut thread>>,
+}
+
+// TODO!!!: acquire dispatcher lock because modifying thread attribute
+//          is racy. But we put this in the future work since that would
+//          require a lot of changes in the kernel task management system.
+unsafe impl Send for CondVar {}
+unsafe impl Sync for CondVar {}
+
+impl CondVar {
+    pub fn new() -> Self {
+        Self {
+            waiters: Spin::new(VecDeque::new()),
+        }
+    }
+
+    pub fn notify_one(&self) {
+        // TODO!!!: acquire dispatcher lock
+        let mut waiters = self.waiters.lock();
+
+        if waiters.is_empty() {
+            return;
+        }
+
+        let thread = waiters
+            .pop_front()
+            .map(|ptr| unsafe { ptr.as_mut() }.unwrap());
+
+        if let Some(thread) = thread {
+            unsafe { thread.set_attr(thread_READY, true) };
+        }
+    }
+
+    pub fn notify_all(&self) {
+        // TODO!!!: acquire dispatcher lock
+        let mut waiters = self.waiters.lock();
+
+        if waiters.is_empty() {
+            return;
+        }
+
+        for item in waiters.iter() {
+            let thread = unsafe { item.as_mut() }.unwrap();
+            unsafe { thread.set_attr(thread_READY, true) };
+        }
+
+        waiters.clear();
+    }
+
+    /// # Might Sleep
+    /// This function **might sleep**, so call it in a preemptible context
+    ///
+    /// # Return
+    /// - `true`: a pending signal was received
+    pub fn wait<'a, T, S: LockStrategy>(
+        &self,
+        guard: &mut Guard<'a, T, S>,
+        interruptible: bool,
+    ) -> bool {
+        preempt_disable();
+
+        // TODO!!!: acquire dispatcher lock
+        let current = *current();
+
+        let current_mut = unsafe { current.as_mut() }.unwrap();
+        unsafe {
+            if interruptible {
+                current_mut.set_attr(thread_ISLEEP, false);
+            } else {
+                current_mut.set_attr(thread_USLEEP, false);
+            }
+        }
+
+        {
+            let mut waiters = self.waiters.lock();
+            waiters.push_back(current);
+        }
+
+        unsafe {
+            guard.force_unlock();
+        }
+
+        might_sleep!(1);
+
+        let has_signals = unsafe { schedule_now_preempt_disabled() };
+
+        unsafe {
+            guard.force_relock();
+        }
+
+        has_signals
+    }
+}

+ 136 - 0
src/sync/lock.rs

@@ -0,0 +1,136 @@
+use core::{
+    cell::UnsafeCell,
+    ops::{Deref, DerefMut},
+};
+
+use super::{spin::IrqStrategy, strategy::LockStrategy};
+
+pub struct Lock<Value: ?Sized, Strategy: LockStrategy> {
+    strategy_data: Strategy::StrategyData,
+    value: UnsafeCell<Value>,
+}
+
+unsafe impl<T: ?Sized + Send, S: LockStrategy> Send for Lock<T, S> {}
+unsafe impl<T: ?Sized + Send, S: LockStrategy> Sync for Lock<T, S> {}
+
+impl<Value, Strategy: LockStrategy> Lock<Value, Strategy> {
+    #[inline(always)]
+    pub fn new(value: Value) -> Self {
+        Self {
+            strategy_data: Strategy::data(),
+            value: UnsafeCell::new(value),
+        }
+    }
+}
+
+impl<Value: ?Sized, Strategy: LockStrategy> Lock<Value, Strategy> {
+    #[inline(always)]
+    pub fn lock<'lt>(&'lt self) -> Guard<'lt, Value, Strategy> {
+        Guard {
+            _phantom: core::marker::PhantomData,
+            value: &self.value,
+            strategy_data: &self.strategy_data,
+            context: unsafe { Strategy::do_lock(&self.strategy_data) },
+        }
+    }
+
+    #[inline(always)]
+    pub fn lock_irq<'lt>(&'lt self) -> Guard<'lt, Value, IrqStrategy<Strategy>> {
+        Guard {
+            _phantom: core::marker::PhantomData,
+            value: &self.value,
+            strategy_data: &self.strategy_data,
+            context: unsafe { IrqStrategy::<Strategy>::do_lock(&self.strategy_data) },
+        }
+    }
+
+    #[inline(always)]
+    pub fn lock_shared<'lt>(&'lt self) -> Guard<'lt, Value, Strategy, false> {
+        Guard {
+            _phantom: core::marker::PhantomData,
+            value: &self.value,
+            strategy_data: &self.strategy_data,
+            context: unsafe { Strategy::do_lock_shared(&self.strategy_data) },
+        }
+    }
+
+    #[inline(always)]
+    pub fn lock_shared_irq<'lt>(&'lt self) -> Guard<'lt, Value, IrqStrategy<Strategy>, false> {
+        Guard {
+            _phantom: core::marker::PhantomData,
+            value: &self.value,
+            strategy_data: &self.strategy_data,
+            context: unsafe { IrqStrategy::<Strategy>::do_lock(&self.strategy_data) },
+        }
+    }
+
+    #[inline(always)]
+    pub fn get_mut(&mut self) -> &mut Value {
+        unsafe { &mut *self.value.get() }
+    }
+}
+
+pub struct Guard<'lock, Value: ?Sized, Strategy: LockStrategy, const Write: bool = true> {
+    _phantom: core::marker::PhantomData<Strategy>,
+    value: &'lock UnsafeCell<Value>,
+    strategy_data: &'lock Strategy::StrategyData,
+    context: Strategy::GuardContext,
+}
+
+impl<'lock, Value: ?Sized, Strategy: LockStrategy> Guard<'lock, Value, Strategy> {
+    /// # Safety
+    /// Use of the lock after calling this function without relocking is undefined behavior.
+    #[inline(always)]
+    pub unsafe fn force_unlock(&mut self) {
+        Strategy::do_temporary_unlock(&self.strategy_data, &mut self.context)
+    }
+
+    /// # Safety
+    /// Calling this function more than once will cause deadlocks.
+    #[inline(always)]
+    pub unsafe fn force_relock(&mut self) {
+        Strategy::do_relock(&self.strategy_data, &mut self.context)
+    }
+}
+
+impl<'lock, Value: ?Sized, Strategy: LockStrategy, const Write: bool> Deref
+    for Guard<'lock, Value, Strategy, Write>
+{
+    type Target = Value;
+
+    fn deref(&self) -> &Self::Target {
+        unsafe { &*self.value.get() }
+    }
+}
+
+impl<'lock, Value: ?Sized, Strategy: LockStrategy> DerefMut
+    for Guard<'lock, Value, Strategy, true>
+{
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        unsafe { &mut *self.value.get() }
+    }
+}
+
+impl<'lock, Value: ?Sized, Strategy: LockStrategy, const Write: bool> AsRef<Value>
+    for Guard<'lock, Value, Strategy, Write>
+{
+    fn as_ref(&self) -> &Value {
+        unsafe { &*self.value.get() }
+    }
+}
+
+impl<'lock, Value: ?Sized, Strategy: LockStrategy> AsMut<Value>
+    for Guard<'lock, Value, Strategy, true>
+{
+    fn as_mut(&mut self) -> &mut Value {
+        unsafe { &mut *self.value.get() }
+    }
+}
+
+impl<'lock, Value: ?Sized, Strategy: LockStrategy, const Write: bool> Drop
+    for Guard<'lock, Value, Strategy, Write>
+{
+    fn drop(&mut self) {
+        unsafe { Strategy::do_unlock(&self.strategy_data, &mut self.context) }
+    }
+}

+ 157 - 0
src/sync/semaphore.rs

@@ -0,0 +1,157 @@
+use super::{condvar::CondVar, strategy::LockStrategy, Spin};
+
+pub struct SemaphoreStrategy<const MAX: usize = { core::usize::MAX }>;
+
+impl<const MAX: usize> SemaphoreStrategy<MAX> {
+    #[inline(always)]
+    fn is_locked(data: &<Self as LockStrategy>::StrategyData) -> bool {
+        let counter = data.counter.lock();
+        *counter > 0
+    }
+}
+
+pub struct SemaphoreData {
+    counter: Spin<usize>,
+    cv: CondVar,
+}
+
+unsafe impl<const MAX: usize> LockStrategy for SemaphoreStrategy<MAX> {
+    type StrategyData = SemaphoreData;
+    type GuardContext = ();
+
+    #[inline(always)]
+    fn data() -> Self::StrategyData {
+        SemaphoreData {
+            counter: Spin::new(0),
+            cv: CondVar::new(),
+        }
+    }
+
+    #[inline(always)]
+    /// Acquire the semaphore in write mode
+    ///
+    /// # Might Sleep
+    unsafe fn do_lock(data: &Self::StrategyData) -> Self::GuardContext {
+        loop {
+            let mut counter = data.counter.lock();
+            assert!(*counter <= MAX);
+
+            if *counter < MAX {
+                *counter += 1;
+                return;
+            }
+
+            // TODO!!!: interruptible wait
+            data.cv.wait(&mut counter, false);
+        }
+    }
+
+    #[inline(always)]
+    unsafe fn do_unlock(data: &Self::StrategyData, _: &mut Self::GuardContext) {
+        let mut counter = data.counter.lock();
+        assert!(*counter <= MAX);
+
+        match *counter {
+            n if n > 0 => {
+                *counter -= 1;
+                data.cv.notify_one();
+            }
+            _ => panic!("Semaphore in inconsistent state"),
+        }
+    }
+}
+
+pub struct RwSemaphoreStrategy<const READ_MAX: isize = { core::isize::MAX }>;
+
+impl<const READ_MAX: isize> RwSemaphoreStrategy<READ_MAX> {
+    #[inline(always)]
+    fn is_read_locked(data: &<Self as LockStrategy>::StrategyData) -> bool {
+        let counter = data.counter.lock();
+        *counter > 0
+    }
+
+    #[inline(always)]
+    fn is_write_locked(data: &<Self as LockStrategy>::StrategyData) -> bool {
+        let counter = data.counter.lock();
+        *counter < 0
+    }
+}
+
+pub struct RwSemaphoreData {
+    counter: Spin<isize>,
+    read_cv: CondVar,
+    write_cv: CondVar,
+}
+
+unsafe impl<const READ_MAX: isize> LockStrategy for RwSemaphoreStrategy<READ_MAX> {
+    type StrategyData = RwSemaphoreData;
+    type GuardContext = ();
+
+    #[inline(always)]
+    fn data() -> Self::StrategyData {
+        RwSemaphoreData {
+            counter: Spin::new(0),
+            read_cv: CondVar::new(),
+            write_cv: CondVar::new(),
+        }
+    }
+
+    #[inline(always)]
+    /// Acquire the semaphore in write mode
+    ///
+    /// # Might Sleep
+    unsafe fn do_lock(data: &Self::StrategyData) -> Self::GuardContext {
+        loop {
+            let mut counter = data.counter.lock();
+            assert!(*counter >= -1 && *counter <= READ_MAX);
+
+            if *counter == 0 {
+                *counter -= 1;
+                return;
+            }
+
+            // TODO!!!: interruptible wait
+            data.write_cv.wait(&mut counter, false);
+        }
+    }
+
+    #[inline(always)]
+    /// Acquire the semaphore in read mode
+    ///
+    /// # Might Sleep
+    unsafe fn do_lock_shared(data: &Self::StrategyData) -> Self::GuardContext {
+        loop {
+            let mut counter = data.counter.lock();
+            assert!(*counter >= -1 && *counter <= READ_MAX);
+
+            if *counter >= 0 && *counter < READ_MAX {
+                *counter += 1;
+                return;
+            }
+
+            // TODO!!!: interruptible wait
+            data.read_cv.wait(&mut counter, false);
+        }
+    }
+
+    #[inline(always)]
+    unsafe fn do_unlock(data: &Self::StrategyData, _: &mut Self::GuardContext) {
+        let mut counter = data.counter.lock();
+        assert!(*counter >= -1 && *counter <= READ_MAX);
+
+        match *counter {
+            -1 => {
+                *counter = 0;
+                data.read_cv.notify_all();
+                data.write_cv.notify_one();
+            }
+            n if n > 0 => {
+                *counter -= 1;
+                if *counter == 0 {
+                    data.write_cv.notify_one();
+                }
+            }
+            _ => panic!("Semaphore in inconsistent state"),
+        }
+    }
+}

+ 105 - 0
src/sync/spin.rs

@@ -0,0 +1,105 @@
+use core::{
+    arch::asm,
+    sync::atomic::{AtomicBool, Ordering},
+};
+
+use crate::sync::preempt_disable;
+
+use super::{preempt_enable, strategy::LockStrategy};
+
+pub struct SpinStrategy;
+
+impl SpinStrategy {
+    #[inline(always)]
+    fn is_locked(data: &<Self as LockStrategy>::StrategyData) -> bool {
+        data.load(Ordering::Relaxed)
+    }
+}
+
+unsafe impl LockStrategy for SpinStrategy {
+    type StrategyData = AtomicBool;
+    type GuardContext = ();
+
+    #[inline(always)]
+    fn data() -> Self::StrategyData {
+        AtomicBool::new(false)
+    }
+
+    #[inline(always)]
+    unsafe fn do_lock(data: &Self::StrategyData) -> Self::GuardContext {
+        use Ordering::{Acquire, Relaxed};
+        preempt_disable();
+
+        while data
+            .compare_exchange_weak(false, true, Acquire, Relaxed)
+            .is_err()
+        {
+            while Self::is_locked(data) {
+                core::hint::spin_loop();
+            }
+        }
+    }
+
+    #[inline(always)]
+    unsafe fn do_unlock(data: &Self::StrategyData, _: &mut Self::GuardContext) {
+        data.store(false, Ordering::Release);
+        preempt_enable();
+    }
+}
+
+pub struct IrqStrategy<Strategy: LockStrategy> {
+    _phantom: core::marker::PhantomData<Strategy>,
+}
+
+unsafe impl<Strategy: LockStrategy> LockStrategy for IrqStrategy<Strategy> {
+    type StrategyData = Strategy::StrategyData;
+    type GuardContext = (Strategy::GuardContext, usize);
+
+    #[inline(always)]
+    fn data() -> Self::StrategyData {
+        Strategy::data()
+    }
+
+    #[inline(always)]
+    unsafe fn do_lock(data: &Self::StrategyData) -> Self::GuardContext {
+        let mut context: usize;
+        asm!(
+            "pushf",
+            "pop {context}",
+            "cli",
+            context = out(reg) context,
+        );
+
+        (Strategy::do_lock(data), context)
+    }
+
+    #[inline(always)]
+    unsafe fn do_unlock(
+        data: &Self::StrategyData,
+        context: &mut Self::GuardContext,
+    ) {
+        Strategy::do_unlock(data, &mut context.0);
+
+        asm!(
+            "push {context}",
+            "popf",
+            context = in(reg) context.1,
+        )
+    }
+
+    #[inline(always)]
+    unsafe fn do_temporary_unlock(
+        data: &Self::StrategyData,
+        context: &mut Self::GuardContext,
+    ) {
+        Strategy::do_unlock(data, &mut context.0)
+    }
+
+    #[inline(always)]
+    unsafe fn do_relock(
+        data: &Self::StrategyData,
+        context: &mut Self::GuardContext,
+    ) {
+        Strategy::do_relock(data, &mut context.0);
+    }
+}

+ 33 - 0
src/sync/strategy.rs

@@ -0,0 +1,33 @@
+pub unsafe trait LockStrategy {
+    type StrategyData;
+    type GuardContext;
+
+    fn data() -> Self::StrategyData;
+
+    unsafe fn do_lock(data: &Self::StrategyData) -> Self::GuardContext;
+
+    unsafe fn do_unlock(
+        data: &Self::StrategyData,
+        context: &mut Self::GuardContext,
+    );
+
+    unsafe fn do_lock_shared(data: &Self::StrategyData) -> Self::GuardContext {
+        Self::do_lock(data)
+    }
+
+    #[inline(always)]
+    unsafe fn do_temporary_unlock(
+        data: &Self::StrategyData,
+        context: &mut Self::GuardContext,
+    ) {
+        Self::do_unlock(data, context);
+    }
+
+    #[inline(always)]
+    unsafe fn do_relock(
+        data: &Self::StrategyData,
+        context: &mut Self::GuardContext,
+    ) {
+        *context = Self::do_lock(data);
+    }
+}

+ 7 - 13
src/types/elf.cpp

@@ -12,6 +12,7 @@
 #include <kernel/mem/vm_area.hpp>
 #include <kernel/process.hpp>
 #include <kernel/vfs.hpp>
+#include <kernel/vfs/dentry.hpp>
 
 static inline void __user_push32(uintptr_t* sp, uint32_t d) {
     // TODO: use copy_to_user
@@ -32,33 +33,27 @@ int types::elf::elf32_load(types::elf::elf32_load_data& d) {
     if (!exec)
         return -ENOENT;
 
-    auto* inode = fs::r_dentry_get_inode(exec.get());
-
     types::elf::elf32_header hdr{};
-    auto n_read =
-        fs::fs_read(inode, (char*)&hdr, sizeof(types::elf::elf32_header), 0,
-                 sizeof(types::elf::elf32_header));
+    auto n_read = fs::fs_read(exec.get(), (char*)&hdr, sizeof(types::elf::elf32_header), 0,
+                              sizeof(types::elf::elf32_header));
 
     if (n_read != sizeof(types::elf::elf32_header))
         return -EINVAL;
 
-    if (hdr.magic[0] != 0x7f || hdr.magic[1] != 'E' || hdr.magic[2] != 'L' ||
-        hdr.magic[3] != 'F')
+    if (hdr.magic[0] != 0x7f || hdr.magic[1] != 'E' || hdr.magic[2] != 'L' || hdr.magic[3] != 'F')
         return -EINVAL;
 
     size_t phents_size = hdr.phentsize * hdr.phnum;
     size_t shents_size = hdr.shentsize * hdr.shnum;
     std::vector<types::elf::elf32_program_header_entry> phents(hdr.phnum);
-    n_read = fs_read(inode, (char*)phents.data(), phents_size, hdr.phoff,
-                      phents_size);
+    n_read = fs::fs_read(exec.get(), (char*)phents.data(), phents_size, hdr.phoff, phents_size);
 
     // broken file or I/O error
     if (n_read != phents_size)
         return -EINVAL;
 
     std::vector<types::elf::elf32_section_header_entry> shents(hdr.shnum);
-    n_read = fs_read(inode, (char*)shents.data(), shents_size, hdr.shoff,
-                      shents_size);
+    n_read = fs::fs_read(exec.get(), (char*)shents.data(), shents_size, hdr.shoff, shents_size);
 
     // broken file or I/O error
     if (n_read != shents_size)
@@ -86,8 +81,7 @@ int types::elf::elf32_load(types::elf::elf32_load_data& d) {
 
             args.vaddr = vaddr;
             args.length = flen;
-            // TODO!!!!!!!: get ownership
-            args.file_inode = inode;
+            args.file = fs::d_get(exec);
             args.file_offset = fileoff;
 
             args.flags = MM_MAPPED;