Browse Source

move interrupt and vm to rust

greatbridf 4 months ago
parent
commit
fca6223938
64 changed files with 1271 additions and 1888 deletions
  1. 10 19
      CMakeLists.txt
  2. 4 0
      Cargo.toml
  3. 2 0
      arch/src/lib.rs
  4. 14 0
      arch/x86_64/src/interrupt.rs
  5. 14 1
      arch/x86_64/src/lib.rs
  6. 24 27
      arch/x86_64/src/task.rs
  7. 0 1
      build.rs
  8. 0 16
      include/kernel/async/lock.hpp
  9. 0 6
      include/kernel/hw/pci.hpp
  10. 0 5
      include/kernel/interrupt.hpp
  11. 0 11
      include/kernel/irq.hpp
  12. 0 112
      include/kernel/mem/mm_list.hpp
  13. 0 2
      include/kernel/mem/paging_asm.h
  14. 0 60
      include/kernel/mem/vm_area.hpp
  15. 0 13
      include/kernel/process.hpp
  16. 0 60
      include/kernel/vfs.hpp
  17. 0 26
      include/kernel/vfs/dentry.hpp
  18. 0 18
      include/kernel/vfs/vfsfwd.hpp
  19. 0 295
      include/types/elf.hpp
  20. 1 1
      src/asm/interrupt.s
  21. 2 1
      src/boot.s
  22. 4 4
      src/driver.rs
  23. 2 0
      src/driver/timer.rs
  24. 370 0
      src/elf.rs
  25. 1 1
      src/fs/procfs.rs
  26. 8 0
      src/io.rs
  27. 33 28
      src/kernel.ld
  28. 1 1
      src/kernel.rs
  29. 7 29
      src/kernel/async/lock.cc
  30. 3 0
      src/kernel/console.rs
  31. 2 0
      src/kernel/constants.rs
  32. 2 1
      src/kernel/hw/pci.cc
  33. 0 142
      src/kernel/interrupt.cpp
  34. 118 6
      src/kernel/interrupt.rs
  35. 2 2
      src/kernel/mem.rs
  36. 2 2
      src/kernel/mem/mm_area.rs
  37. 45 17
      src/kernel/mem/mm_list.rs
  38. 202 0
      src/kernel/mem/mm_list/page_fault.rs
  39. 53 39
      src/kernel/mem/page_table.rs
  40. 0 146
      src/kernel/mem/paging.cc
  41. 36 12
      src/kernel/mem/paging.rs
  42. 1 90
      src/kernel/process.cpp
  43. 2 11
      src/kernel/syscall.rs
  44. 15 4
      src/kernel/syscall/mm.rs
  45. 23 30
      src/kernel/syscall/procops.rs
  46. 2 2
      src/kernel/task.rs
  47. 1 1
      src/kernel/task/kstack.rs
  48. 15 1
      src/kernel/task/scheduler.rs
  49. 7 8
      src/kernel/task/signal.rs
  50. 46 55
      src/kernel/task/thread.rs
  51. 1 65
      src/kernel/terminal.rs
  52. 5 2
      src/kernel/timer.rs
  53. 0 76
      src/kernel/vfs.cpp
  54. 7 80
      src/kernel/vfs/dentry.rs
  55. 0 22
      src/kernel/vfs/ffi.rs
  56. 0 30
      src/kernel/vfs/filearray.rs
  57. 0 1
      src/kernel/vfs/mod.rs
  58. 10 28
      src/kinit.cpp
  59. 129 57
      src/lib.rs
  60. 34 28
      src/sync.rs
  61. 2 2
      src/sync/condvar.rs
  62. 7 7
      src/sync/lock.rs
  63. 0 180
      src/types/elf.cpp
  64. 2 4
      src/types/libstdcpp.cpp

+ 10 - 19
CMakeLists.txt

@@ -41,31 +41,23 @@ set(BOOTLOADER_SOURCES src/boot.s
 set(KERNEL_MAIN_SOURCES src/kinit.cpp
                         src/kernel/async/lock.cc
                         src/kernel/allocator.cc
-                        src/kernel/interrupt.cpp
                         src/kernel/process.cpp
                         src/kernel/mem/paging.cc
                         src/kernel/mem/slab.cc
-                        src/kernel/vfs.cpp
                         src/kernel/vga.cpp
                         src/kernel/hw/acpi.cc
                         src/kernel/hw/pci.cc
                         src/net/ethernet.cc
                         src/types/crc.cc
-                        src/types/elf.cpp
                         src/types/libstdcpp.cpp
                         include/defs.hpp
                         include/kernel/async/lock.hpp
                         include/kernel/interrupt.hpp
-                        include/kernel/irq.hpp
                         include/kernel/process.hpp
-                        include/kernel/mem/mm_list.hpp
                         include/kernel/mem/paging.hpp
                         include/kernel/mem/slab.hpp
                         include/kernel/mem/types.hpp
-                        include/kernel/mem/vm_area.hpp
                         include/kernel/utsname.hpp
-                        include/kernel/vfs.hpp
-                        include/kernel/vfs/dentry.hpp
                         include/kernel/vga.hpp
                         include/kernel/task/forward.hpp
                         include/kernel/hw/acpi.hpp
@@ -77,7 +69,6 @@ set(KERNEL_MAIN_SOURCES src/kinit.cpp
                         include/net/netdev.hpp
                         include/types/bitmap.hpp
                         include/types/buffer.hpp
-                        include/types/elf.hpp
                         include/types/list.hpp
                         include/types/types.h
                         include/types/allocator.hpp
@@ -112,16 +103,16 @@ add_custom_target(boot.img
     DEPENDS user_space_programs
     COMMAND dd if=mbr_hole.bin of=boot.img
     COMMAND dd if=/dev/zero of=boot.img bs=`expr 512 \\* 1024 \\* 1024` count=0 seek=1
-    COMMAND sh -c \"echo n\; echo\; echo\; echo\; echo\; echo a\; echo w\" | ${FDISK_BIN} boot.img
-    COMMAND mkfs.fat --offset=2048 -v -n SYSTEM boot.img
-    COMMAND mcopy -i boot.img@@1M ${CMAKE_BINARY_DIR}/user-space-program/hello-world.out ::hello
-    COMMAND mcopy -i boot.img@@1M ${CMAKE_BINARY_DIR}/user-space-program/interrupt-test.out ::int
-    COMMAND mcopy -i boot.img@@1M ${CMAKE_BINARY_DIR}/user-space-program/stack-test.out ::stack
-    COMMAND mcopy -i boot.img@@1M ${CMAKE_BINARY_DIR}/user-space-program/init.out ::init
-    COMMAND mcopy -i boot.img@@1M ${CMAKE_BINARY_DIR}/user-space-program/priv-test.out ::priv
-    COMMAND mcopy -i boot.img@@1M ${CMAKE_SOURCE_DIR}/busybox-minimal ::busybox_
-    COMMAND mcopy -i boot.img@@1M ${CMAKE_SOURCE_DIR}/busybox ::busybox
-    COMMAND mcopy -i boot.img@@1M ${CMAKE_SOURCE_DIR}/init_script.sh ::initsh
+    COMMAND sh -c \"echo n\; echo\; echo \; echo 8192\; echo\; echo a\; echo w\" | ${FDISK_BIN} boot.img
+    COMMAND mkfs.fat --offset=8192 -v -n SYSTEM boot.img
+    COMMAND mcopy -i boot.img@@4M ${CMAKE_BINARY_DIR}/user-space-program/hello-world.out ::hello
+    COMMAND mcopy -i boot.img@@4M ${CMAKE_BINARY_DIR}/user-space-program/interrupt-test.out ::int
+    COMMAND mcopy -i boot.img@@4M ${CMAKE_BINARY_DIR}/user-space-program/stack-test.out ::stack
+    COMMAND mcopy -i boot.img@@4M ${CMAKE_BINARY_DIR}/user-space-program/init.out ::init
+    COMMAND mcopy -i boot.img@@4M ${CMAKE_BINARY_DIR}/user-space-program/priv-test.out ::priv
+    COMMAND mcopy -i boot.img@@4M ${CMAKE_SOURCE_DIR}/busybox-minimal ::busybox_
+    COMMAND mcopy -i boot.img@@4M ${CMAKE_SOURCE_DIR}/busybox ::busybox
+    COMMAND mcopy -i boot.img@@4M ${CMAKE_SOURCE_DIR}/init_script.sh ::initsh
 )
 
 add_custom_command(OUTPUT run

+ 4 - 0
Cargo.toml

@@ -19,6 +19,10 @@ bindgen = "0.70.1"
 [profile.dev]
 panic = "abort"
 
+[profile.dev.package.core]
+opt-level = 2
+debug = true
+
 [profile.dev.package."*"]
 opt-level = 2
 debug = false

+ 2 - 0
arch/src/lib.rs

@@ -90,3 +90,5 @@ pub mod io {
         x86_64::io::outl(port, data)
     }
 }
+
+pub use x86_64;

+ 14 - 0
arch/x86_64/src/interrupt.rs

@@ -11,3 +11,17 @@ pub fn disable() {
         asm!("cli");
     }
 }
+
+pub fn lidt(base: usize, limit: u16) {
+    let mut idt_descriptor = [0u16; 5];
+
+    idt_descriptor[0] = limit;
+    idt_descriptor[1] = base as u16;
+    idt_descriptor[2] = (base >> 16) as u16;
+    idt_descriptor[3] = (base >> 32) as u16;
+    idt_descriptor[4] = (base >> 48) as u16;
+
+    unsafe {
+        asm!("lidt ({})", in(reg) &idt_descriptor, options(att_syntax));
+    }
+}

+ 14 - 1
arch/x86_64/src/lib.rs

@@ -43,12 +43,25 @@ pub mod vm {
     pub fn set_cr3(pfn: usize) {
         unsafe {
             asm!(
-                "mov %cr3, {0}",
+                "mov {0}, %cr3",
                 in(reg) pfn,
                 options(att_syntax)
             );
         }
     }
+
+    #[inline(always)]
+    pub fn get_cr2() -> usize {
+        let cr2: usize;
+        unsafe {
+            asm!(
+                "mov %cr2, {}",
+                out(reg) cr2,
+                options(att_syntax)
+            );
+        }
+        cr2
+    }
 }
 
 pub mod interrupt;

+ 24 - 27
arch/x86_64/src/task.rs

@@ -1,5 +1,7 @@
 use core::arch::{asm, global_asm};
 
+use crate::interrupt;
+
 #[inline(always)]
 pub fn halt() {
     unsafe {
@@ -17,10 +19,7 @@ pub fn pause() {
 #[inline(always)]
 pub fn freeze() -> ! {
     loop {
-        unsafe {
-            asm!("cli", options(att_syntax, nostack));
-        }
-
+        interrupt::disable();
         halt();
     }
 }
@@ -28,13 +27,13 @@ pub fn freeze() -> ! {
 global_asm!(
     r"
     .macro movcfi reg, offset
-    	mov \reg, \offset(%rsp)
-    	.cfi_rel_offset \reg, \offset
+        mov \reg, \offset(%rsp)
+        .cfi_rel_offset \reg, \offset
     .endm
 
     .macro movrst reg, offset
-    	mov \offset(%rsp), \reg
-    	.cfi_restore \reg
+        mov \offset(%rsp), \reg
+        .cfi_restore \reg
     .endm
 
     .globl __context_switch_light
@@ -45,40 +44,38 @@ global_asm!(
         pushf
     .cfi_def_cfa_offset 0x10
 
-	    sub $0x38, %rsp  # extra 8 bytes to align to 16 bytes
+        sub $0x38, %rsp  # extra 8 bytes to align to 16 bytes
     .cfi_def_cfa_offset 0x48
 
-	    movcfi %rbx, 0x08
-	    movcfi %rbp, 0x10
-	    movcfi %r12, 0x18
-	    movcfi %r13, 0x20
-	    movcfi %r14, 0x28
-	    movcfi %r15, 0x30
+        movcfi %rbx, 0x08
+        movcfi %rbp, 0x10
+        movcfi %r12, 0x18
+        movcfi %r13, 0x20
+        movcfi %r14, 0x28
+        movcfi %r15, 0x30
 
-        push (%rdi) 	 # save sp of previous stack frame of current
-	                     # acts as saving bp
+        push (%rdi)      # save sp of previous stack frame of current
+                         # acts as saving bp
     .cfi_def_cfa_offset 0x50
 
         mov %rsp, (%rdi) # save sp of current stack
         mov (%rsi), %rsp # load sp of target stack
 
         pop (%rsi)       # load sp of previous stack frame of target
-	                     # acts as restoring previous bp
+                         # acts as restoring previous bp
     .cfi_def_cfa_offset 0x48
 
-	    pop %rax         # align to 16 bytes
+        pop %rax         # align to 16 bytes
     .cfi_def_cfa_offset 0x40
 
-	    call after_ctx_switch
-
-	    mov 0x28(%rsp), %r15
-	    mov 0x20(%rsp), %r14
-	    mov 0x18(%rsp), %r13
-	    mov 0x10(%rsp), %r12
-	    mov 0x08(%rsp), %rbp
+        mov 0x28(%rsp), %r15
+        mov 0x20(%rsp), %r14
+        mov 0x18(%rsp), %r13
+        mov 0x10(%rsp), %r12
+        mov 0x08(%rsp), %rbp
         mov 0x00(%rsp), %rbx
 
-	    add $0x30, %rsp
+        add $0x30, %rsp
     .cfi_def_cfa_offset 0x10
 
         popf

+ 0 - 1
build.rs

@@ -5,7 +5,6 @@ fn main() {
     let headers = [
         "include/kernel/process.hpp",
         "include/kernel/hw/pci.hpp",
-        "include/types/elf.hpp",
     ];
 
     let bindings = bindgen::Builder::default()

+ 0 - 16
include/kernel/async/lock.hpp

@@ -8,11 +8,6 @@ namespace kernel::async {
 
 using spinlock_t = unsigned long volatile;
 using lock_context_t = unsigned long;
-using preempt_count_t = ssize_t;
-
-void preempt_disable();
-void preempt_enable();
-preempt_count_t preempt_count();
 
 void init_spinlock(spinlock_t& lock);
 
@@ -38,17 +33,6 @@ class mutex {
     void unlock_irq(lock_context_t state);
 };
 
-class lock_guard {
-   private:
-    mutex& m_mtx;
-
-   public:
-    explicit inline lock_guard(mutex& mtx) : m_mtx{mtx} { m_mtx.lock(); }
-    lock_guard(const lock_guard&) = delete;
-
-    inline ~lock_guard() { m_mtx.unlock(); }
-};
-
 class lock_guard_irq {
    private:
     mutex& m_mtx;

+ 0 - 6
include/kernel/hw/pci.hpp

@@ -9,12 +9,6 @@
 
 #include <kernel/mem/phys.hpp>
 
-namespace kernel::kinit {
-
-void init_pci();
-
-} // namespace kernel::kinit
-
 namespace kernel::hw::pci {
 
 struct PACKED device_header_base {

+ 0 - 5
include/kernel/interrupt.hpp

@@ -36,8 +36,3 @@ struct interrupt_stack {
 struct mmx_registers {
     uint8_t data[512]; // TODO: list of content
 };
-
-namespace kernel::kinit {
-void init_interrupt();
-
-} // namespace kernel::kinit

+ 0 - 11
include/kernel/irq.hpp

@@ -1,11 +0,0 @@
-#pragma once
-
-#include <functional>
-
-namespace kernel::irq {
-
-using irq_handler_t = std::function<void()>;
-
-void register_handler(int irqno, irq_handler_t handler);
-
-}; // namespace kernel::irq

+ 0 - 112
include/kernel/mem/mm_list.hpp

@@ -1,112 +0,0 @@
-#pragma once
-
-#include "paging.hpp"
-#include "vm_area.hpp"
-
-#include <set>
-
-#include <stdint.h>
-
-#include <kernel/vfs/dentry.hpp>
-
-namespace kernel::mem {
-
-constexpr uintptr_t KERNEL_SPACE_START = 0x8000000000000000ULL;
-constexpr uintptr_t USER_SPACE_MEMORY_TOP = 0x0000800000000000ULL;
-constexpr uintptr_t MMAP_MIN_ADDR = 0x0000000000001000ULL;
-constexpr uintptr_t STACK_MIN_ADDR = 0x0000700000000000ULL;
-
-class mm_list {
-   private:
-    struct comparator {
-        constexpr bool operator()(const vm_area& lhs,
-                                  const vm_area& rhs) const noexcept {
-            return lhs < rhs;
-        }
-        constexpr bool operator()(const vm_area& lhs,
-                                  uintptr_t rhs) const noexcept {
-            return lhs < rhs;
-        }
-        constexpr bool operator()(uintptr_t lhs,
-                                  const vm_area& rhs) const noexcept {
-            return lhs < rhs;
-        }
-    };
-
-   public:
-    using list_type = std::set<vm_area, comparator>;
-    using iterator = list_type::iterator;
-    using const_iterator = list_type::const_iterator;
-
-    struct map_args {
-        // MUSE BE aligned to 4kb boundary
-        uintptr_t vaddr;
-        // MUSE BE aligned to 4kb boundary
-        std::size_t length;
-
-        unsigned long flags;
-
-        fs::dentry_pointer file;
-        // MUSE BE aligned to 4kb boundary
-        std::size_t file_offset;
-    };
-
-   private:
-    list_type m_areas;
-    paging::pfn_t m_pt;
-    iterator m_brk{};
-
-   public:
-    // default constructor copies kernel_mms
-    explicit mm_list();
-    // copies kernel_mms and mirrors user space
-    explicit mm_list(const mm_list& other);
-
-    constexpr mm_list(mm_list&& v)
-        : m_areas(std::move(v.m_areas))
-        , m_pt(std::exchange(v.m_pt, 0))
-        , m_brk{std::move(v.m_brk)} {}
-
-    ~mm_list();
-
-    void switch_pd() const noexcept;
-
-    int register_brk(uintptr_t addr);
-    uintptr_t set_brk(uintptr_t addr);
-
-    void clear();
-
-    // split the memory block at the specified address
-    // return: iterator to the new block
-    iterator split(iterator area, uintptr_t at);
-
-    bool is_avail(uintptr_t addr) const;
-    bool is_avail(uintptr_t start, std::size_t length) const noexcept;
-
-    uintptr_t find_avail(uintptr_t hint, size_t length) const;
-
-    int unmap(iterator area, bool should_invalidate_tlb);
-    int unmap(uintptr_t start, std::size_t length, bool should_invalidate_tlb);
-
-    int mmap(const map_args& args);
-
-    constexpr vm_area* find(uintptr_t lp) {
-        auto iter = m_areas.find(lp);
-        if (iter == m_areas.end())
-            return nullptr;
-        return &iter;
-    }
-
-    constexpr const vm_area* find(uintptr_t lp) const {
-        auto iter = m_areas.find(lp);
-        if (iter == m_areas.end())
-            return nullptr;
-        return &iter;
-    }
-
-    constexpr paging::PSE get_page_table() const noexcept {
-        return paging::PSE{m_pt};
-    }
-};
-
-} // namespace kernel::mem

+ 0 - 2
include/kernel/mem/paging_asm.h

@@ -1,4 +1,3 @@
-
 #define KERNEL_IMAGE_PADDR         0x400000
 #define KERNEL_STAGE1_PADDR        0x001000
 #define KERNEL_PML4                0x002000
@@ -7,7 +6,6 @@
 #define KERNEL_PD_KIMAGE           0x005000
 #define KERNEL_PT_KIMAGE           0x006000
 #define KERNEL_PD_STRUCT_PAGE_ARR  0x007000
-#define EMPTY_PAGE_PFN             0x008000
 
 #define KERNEL_BSS_HUGE_PAGE       0x200000
 

+ 0 - 60
include/kernel/mem/vm_area.hpp

@@ -1,60 +0,0 @@
-#pragma once
-
-#include <stdint.h>
-
-#include <kernel/vfs.hpp>
-#include <kernel/vfs/dentry.hpp>
-
-namespace kernel::mem {
-
-constexpr unsigned long MM_WRITE = 0x00000000'00000001;
-constexpr unsigned long MM_EXECUTE = 0x00000000'00000002;
-constexpr unsigned long MM_MAPPED = 0x00000000'00000004;
-constexpr unsigned long MM_ANONYMOUS = 0x00000000'00000008;
-constexpr unsigned long MM_INTERNAL_MASK = 0xffffffff'00000000;
-constexpr unsigned long MM_BREAK = 0x80000000'00000000;
-
-struct vm_area {
-    uintptr_t start;
-    uintptr_t end;
-
-    unsigned long flags;
-
-    fs::dentry_pointer mapped_file;
-    std::size_t file_offset;
-
-    constexpr bool is_avail(uintptr_t ostart, uintptr_t oend) const noexcept {
-        return (ostart >= end || oend <= start);
-    }
-
-    constexpr bool operator<(const vm_area& rhs) const noexcept { return end <= rhs.start; }
-    constexpr bool operator<(uintptr_t rhs) const noexcept { return end <= rhs; }
-    friend constexpr bool operator<(uintptr_t lhs, const vm_area& rhs) noexcept {
-        return lhs < rhs.start;
-    }
-
-    constexpr vm_area(uintptr_t start, unsigned long flags, uintptr_t end,
-                      fs::dentry_pointer mapped_file = nullptr, std::size_t offset = 0)
-        : start{start}
-        , end{end}
-        , flags{flags}
-        , mapped_file{std::move(mapped_file)}
-        , file_offset{offset} {}
-
-    constexpr vm_area(uintptr_t start, unsigned long flags,
-                      fs::dentry_pointer mapped_file = nullptr, std::size_t offset = 0)
-        : start{start}
-        , end{start}
-        , flags{flags}
-        , mapped_file{std::move(mapped_file)}
-        , file_offset{offset} {}
-
-    inline vm_area(const vm_area& other)
-        : start{other.start}
-        , end{other.end}
-        , flags{other.flags}
-        , mapped_file{d_get(other.mapped_file)}
-        , file_offset{other.file_offset} {}
-};
-
-} // namespace kernel::mem

+ 0 - 13
include/kernel/process.hpp

@@ -1,11 +1,5 @@
 #pragma once
 
-#include <list>
-#include <map>
-#include <set>
-#include <tuple>
-#include <utility>
-
 #include <assert.h>
 #include <errno.h>
 #include <fcntl.h>
@@ -19,14 +13,7 @@
 #include <types/types.h>
 
 #include <kernel/interrupt.hpp>
-#include <kernel/mem/mm_list.hpp>
 #include <kernel/mem/paging.hpp>
 #include <kernel/vfs.hpp>
-#include <kernel/vfs/dentry.hpp>
-
-void NORETURN init_scheduler(kernel::mem::paging::pfn_t kernel_stack_pfn);
-/// @return true if returned normally, false if being interrupted
-void NORETURN schedule_noreturn(void);
 
 void NORETURN freeze(void);
-void NORETURN kill_current(int signo);

+ 0 - 60
include/kernel/vfs.hpp

@@ -5,11 +5,6 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 
-#include <types/path.hpp>
-
-#include <kernel/mem/paging.hpp>
-#include <kernel/vfs/dentry.hpp>
-
 #define NODE_MAJOR(node) (((node) >> 8) & 0xFFU)
 #define NODE_MINOR(node) ((node) & 0xFFU)
 
@@ -19,59 +14,4 @@ constexpr dev_t make_device(uint32_t major, uint32_t minor) {
     return ((major << 8) & 0xFF00U) | (minor & 0xFFU);
 }
 
-// buf, buf_size, cnt
-using chrdev_read = std::function<ssize_t(char*, std::size_t, std::size_t)>;
-
-// buf, cnt
-using chrdev_write = std::function<ssize_t(const char*, std::size_t)>;
-
-struct chrdev_ops {
-    chrdev_read read;
-    chrdev_write write;
-};
-
-int register_char_device(dev_t node, const chrdev_ops& ops);
-ssize_t char_device_read(dev_t node, char* buf, size_t buf_size, size_t n);
-ssize_t char_device_write(dev_t node, const char* buf, size_t n);
-
-class rust_file_array {
-   public:
-    struct handle;
-
-   private:
-    struct handle* m_handle;
-
-   public:
-    rust_file_array(struct handle* handle);
-    rust_file_array(const rust_file_array&) = delete;
-    ~rust_file_array();
-
-    constexpr rust_file_array(rust_file_array&& other) noexcept
-        : m_handle(std::exchange(other.m_handle, nullptr)) {}
-
-    struct handle* get() const;
-    void drop();
-};
-
-class rust_fs_context {
-   public:
-    struct handle;
-
-   private:
-    struct handle* m_handle;
-
-   public:
-    rust_fs_context(struct handle* handle);
-    rust_fs_context(const rust_fs_context&) = delete;
-    ~rust_fs_context();
-
-    constexpr rust_fs_context(rust_fs_context&& other) noexcept
-        : m_handle(std::exchange(other.m_handle, nullptr)) {}
-
-    struct handle* get() const;
-    void drop();
-};
-
-extern "C" size_t fs_read(struct dentry* file, char* buf, size_t buf_size, size_t offset, size_t n);
-
 } // namespace fs

+ 0 - 26
include/kernel/vfs/dentry.hpp

@@ -1,26 +0,0 @@
-#pragma once
-
-#include <string>
-
-#include <bits/alltypes.h>
-
-#include <types/path.hpp>
-
-#include <kernel/async/lock.hpp>
-
-struct dentry;
-
-namespace fs {
-
-struct rust_vfs_handle {
-    void* data[2];
-};
-
-struct dentry_deleter {
-    void operator()(struct dentry* dentry) const;
-};
-
-using dentry_pointer = std::unique_ptr<struct dentry, dentry_deleter>;
-dentry_pointer d_get(const dentry_pointer& dp);
-
-} // namespace fs

+ 0 - 18
include/kernel/vfs/vfsfwd.hpp

@@ -1,18 +0,0 @@
-#pragma once
-
-namespace fs {
-
-// in dentry.hpp
-struct dcache;
-struct dentry;
-
-// in filearray.hpp
-class file_array;
-
-// in inode.hpp
-struct inode;
-
-// in vfs.hpp
-class vfs;
-
-} // namespace fs

+ 0 - 295
include/types/elf.hpp

@@ -1,295 +0,0 @@
-#pragma once
-
-#include <vector>
-
-#include <stdint.h>
-
-#include <kernel/interrupt.hpp>
-#include <kernel/process.hpp>
-#include <kernel/vfs.hpp>
-#include <kernel/vfs/dentry.hpp>
-
-namespace types::elf {
-
-using elf32_addr_t = uint32_t;
-using elf32_off_t = uint32_t;
-
-using elf64_addr_t = uint64_t;
-using elf64_off_t = uint64_t;
-
-constexpr elf32_addr_t ELF32_STACK_BOTTOM = 0xbffff000;
-constexpr elf32_off_t ELF32_STACK_SIZE = 8 * 1024 * 1024;
-constexpr elf32_addr_t ELF32_STACK_TOP = ELF32_STACK_BOTTOM - ELF32_STACK_SIZE;
-
-constexpr int ELF_LOAD_FAIL_NORETURN = 0x114514;
-
-struct PACKED elf32_header {
-    // 0x7f, "ELF"
-    char magic[4];
-
-    enum : uint8_t {
-        FORMAT_32 = 1,
-        FORMAT_64 = 2,
-    } format;
-    enum : uint8_t {
-        ENDIAN_LITTLE = 1,
-        ENDIAN_BIG = 2,
-    } endian;
-    // should be 1
-    uint8_t _version1;
-    enum : uint8_t {
-        ABI_SYSTEM_V = 0x00,
-        // TODO:
-        ABI_LINUX = 0x03,
-    } abi;
-    uint8_t abi_version;
-    uint8_t _reserved[7];
-    enum : uint16_t {
-        ET_NONE = 0x00,
-        ET_REL = 0x01,
-        ET_EXEC = 0x02,
-        ET_DYN = 0x03,
-        ET_CORE = 0x04,
-        ET_LOOS = 0xfe00,
-        ET_HIOS = 0xfeff,
-        ET_LOPROC = 0xff00,
-        ET_HIPROC = 0xffff,
-    } type;
-    enum : uint16_t {
-        ARCH_NONE = 0x00,
-        ARCH_X86 = 0x03,
-        ARCH_ARM = 0x28,
-        ARCH_IA64 = 0x32,
-        ARCH_X86_64 = 0x3e,
-        ARCH_ARM64 = 0xb7,
-        ARCH_RISCV = 0xf3,
-    } arch;
-    // should be 1
-    uint32_t _version2;
-    // entry address
-    elf32_addr_t entry;
-    // program header table offset
-    elf32_off_t phoff;
-    // section header table offset
-    elf32_off_t shoff;
-    // architecture dependent flags
-    uint32_t flags;
-    // elf header size
-    uint16_t ehsize;
-    // program header table entry size
-    uint16_t phentsize;
-    // program header table entries number
-    uint16_t phnum;
-    // section header table entry size
-    uint16_t shentsize;
-    // section header table entries number
-    uint16_t shnum;
-    // section header table entry index that contains section names
-    uint16_t shstrndx;
-};
-
-struct PACKED elf32_program_header_entry {
-    enum : uint32_t {
-        PT_NULL = 0x00,
-        PT_LOAD = 0x01,
-        PT_DYNAMIC = 0x02,
-        PT_INTERP = 0x03,
-        PT_NOTE = 0x04,
-        PT_SHLIB = 0x05,
-        PT_PHDR = 0x06,
-        PT_TLS = 0x07,
-        PT_LOOS = 0x60000000,
-        PT_HIOS = 0x6fffffff,
-        PT_LIPROC = 0x70000000,
-        PT_HIPROC = 0x7fffffff,
-    } type;
-    elf32_off_t offset;
-    elf32_addr_t vaddr;
-    elf32_addr_t paddr;
-    elf32_off_t filesz;
-    elf32_off_t memsz;
-    // segment dependent
-    enum : uint32_t {
-        PF_X = 0x1,
-        PF_W = 0x2,
-        PF_R = 0x4,
-    } flags;
-    // 0 and 1 for no alignment, otherwise power of 2
-    uint32_t align;
-};
-
-struct PACKED elf32_section_header_entry {
-    elf32_off_t sh_name;
-    enum : uint32_t {
-        SHT_NULL = 0x00,
-        SHT_PROGBITS = 0x01,
-        SHT_RELA = 0x04,
-        SHT_DYNAMIC = 0x06,
-        SHT_NOTE = 0x07,
-        SHT_NOBITS = 0x08,
-        SHT_REL = 0x09,
-        SHT_DYNSYM = 0x0b,
-        SHT_INIT_ARRAY = 0x0e,
-        SHT_FINI_ARRAY = 0x0f,
-        SHT_PREINIT_ARRAY = 0x0f,
-    } sh_type;
-    enum : uint32_t {
-        SHF_WRITE = 0x01,
-        SHF_ALLOC = 0x02,
-        SHF_EXECINSTR = 0x04,
-    } sh_flags;
-    elf32_addr_t sh_addr;
-    elf32_off_t sh_offset;
-    elf32_off_t sh_size;
-    uint32_t sh_link;
-    uint32_t sh_info;
-    elf32_off_t sh_addralign;
-    elf32_off_t sh_entsize;
-};
-
-struct elf32_load_data {
-    struct dentry* exec_dent; // Owned
-    const char* const* argv;
-    size_t argv_count;
-    const char* const* envp;
-    size_t envp_count;
-    uintptr_t ip;
-    uintptr_t sp;
-};
-
-// TODO: environment variables
-int elf32_load(elf32_load_data& data);
-
-struct PACKED elf64_header {
-    // 0x7f, "ELF"
-    char magic[4];
-
-    enum : uint8_t {
-        FORMAT_32 = 1,
-        FORMAT_64 = 2,
-    } format;
-    enum : uint8_t {
-        ENDIAN_LITTLE = 1,
-        ENDIAN_BIG = 2,
-    } endian;
-    // should be 1
-    uint8_t _version1;
-    enum : uint8_t {
-        ABI_SYSTEM_V = 0x00,
-        // TODO:
-        ABI_LINUX = 0x03,
-    } abi;
-    uint8_t abi_version;
-    uint8_t _reserved[7];
-    enum : uint16_t {
-        ET_NONE = 0x00,
-        ET_REL = 0x01,
-        ET_EXEC = 0x02,
-        ET_DYN = 0x03,
-        ET_CORE = 0x04,
-        ET_LOOS = 0xfe00,
-        ET_HIOS = 0xfeff,
-        ET_LOPROC = 0xff00,
-        ET_HIPROC = 0xffff,
-    } type;
-    enum : uint16_t {
-        ARCH_NONE = 0x00,
-        ARCH_X86 = 0x03,
-        ARCH_ARM = 0x28,
-        ARCH_IA64 = 0x32,
-        ARCH_X86_64 = 0x3e,
-        ARCH_ARM64 = 0xb7,
-        ARCH_RISCV = 0xf3,
-    } arch;
-    // should be 1
-    uint32_t _version2;
-    // entry address
-    elf64_addr_t entry;
-    // program header table offset
-    elf64_off_t phoff;
-    // section header table offset
-    elf64_off_t shoff;
-    // architecture dependent flags
-    uint32_t flags;
-    // elf header size
-    uint16_t ehsize;
-    // program header table entry size
-    uint16_t phentsize;
-    // program header table entries number
-    uint16_t phnum;
-    // section header table entry size
-    uint16_t shentsize;
-    // section header table entries number
-    uint16_t shnum;
-    // section header table entry index that contains section names
-    uint16_t shstrndx;
-};
-
-struct PACKED elf64_program_header_entry {
-    enum : uint32_t {
-        PT_NULL = 0x00,
-        PT_LOAD = 0x01,
-        PT_DYNAMIC = 0x02,
-        PT_INTERP = 0x03,
-        PT_NOTE = 0x04,
-        PT_SHLIB = 0x05,
-        PT_PHDR = 0x06,
-        PT_TLS = 0x07,
-        PT_LOOS = 0x60000000,
-        PT_HIOS = 0x6fffffff,
-        PT_LIPROC = 0x70000000,
-        PT_HIPROC = 0x7fffffff,
-    } type;
-    // segment dependent
-    enum : uint32_t {
-        PF_X = 0x1,
-        PF_W = 0x2,
-        PF_R = 0x4,
-    } flags;
-    elf64_off_t offset;
-    elf64_addr_t vaddr;
-    elf64_addr_t paddr;
-    elf64_off_t filesz;
-    elf64_off_t memsz;
-    // 0 and 1 for no alignment, otherwise power of 2
-    uint64_t align;
-};
-
-struct PACKED elf64_section_header_entry {
-    uint32_t sh_name;
-    enum : uint32_t {
-        SHT_NULL = 0x00,
-        SHT_PROGBITS = 0x01,
-        SHT_RELA = 0x04,
-        SHT_DYNAMIC = 0x06,
-        SHT_NOTE = 0x07,
-        SHT_NOBITS = 0x08,
-        SHT_REL = 0x09,
-        SHT_DYNSYM = 0x0b,
-        SHT_INIT_ARRAY = 0x0e,
-        SHT_FINI_ARRAY = 0x0f,
-        SHT_PREINIT_ARRAY = 0x0f,
-    } sh_type;
-    enum : uint64_t {
-        SHF_WRITE = 0x01,
-        SHF_ALLOC = 0x02,
-        SHF_EXECINSTR = 0x04,
-    } sh_flags;
-    elf64_addr_t sh_addr;
-    elf64_off_t sh_offset;
-    elf64_off_t sh_size;
-    uint32_t sh_link;
-    uint32_t sh_info;
-    elf64_off_t sh_addralign;
-    elf64_off_t sh_entsize;
-};
-
-struct elf64_load_data {
-    fs::dentry_pointer exec_dent;
-    std::vector<std::string> argv;
-    std::vector<std::string> envp;
-    unsigned long ip;
-    unsigned long sp;
-};
-
-} // namespace types::elf

+ 1 - 1
src/asm/interrupt.s

@@ -33,8 +33,8 @@
 	.cfi_restore \reg
 .endm
 
-.extern after_ctx_switch
 .globl ISR_stub_restore
+.type ISR_stub_restore @function
 
 ISR_stub:
 	.cfi_startproc

+ 2 - 1
src/boot.s

@@ -295,7 +295,8 @@ fill_pxe:
 
 .section .text
 start_64bit:
-    # set stack pointer and clear stack bottom
+    # We map the first 1GB identically to the first 1GB of physical memory,
+    # move sp to the correct position in identically mapped area of kernel space.
     mov %rsp, %rdi
     xor %rsp, %rsp
     inc %rsp

+ 4 - 4
src/driver.rs

@@ -4,20 +4,20 @@ pub mod serial;
 pub mod timer;
 
 // TODO!!!: Put it somewhere else.
-pub(self) struct Port8 {
+pub struct Port8 {
     no: u16,
 }
 
 impl Port8 {
-    const fn new(no: u16) -> Self {
+    pub const fn new(no: u16) -> Self {
         Self { no }
     }
 
-    fn read(&self) -> u8 {
+    pub fn read(&self) -> u8 {
         arch::io::inb(self.no)
     }
 
-    fn write(&self, data: u8) {
+    pub fn write(&self, data: u8) {
         arch::io::outb(self.no, data)
     }
 }

+ 2 - 0
src/driver/timer.rs

@@ -4,6 +4,7 @@ const COUNT: Port8 = Port8::new(0x40);
 const CONTROL: Port8 = Port8::new(0x43);
 
 pub fn init() {
+    arch::interrupt::disable();
     // Set interval
     CONTROL.write(0x34);
 
@@ -11,4 +12,5 @@ pub fn init() {
     // 0x2e9a = 11930 = 100Hz
     COUNT.write(0x9a);
     COUNT.write(0x2e);
+    arch::interrupt::enable();
 }

+ 370 - 0
src/elf.rs

@@ -0,0 +1,370 @@
+use alloc::{ffi::CString, sync::Arc};
+use bitflags::bitflags;
+
+use crate::{
+    io::{RawBuffer, UninitBuffer},
+    kernel::{
+        constants::ENOEXEC,
+        mem::{FileMapping, MMList, Mapping, Permission, VAddr},
+        task::Thread,
+        user::{dataflow::CheckedUserPointer, UserPointerMut},
+        vfs::dentry::Dentry,
+    },
+    prelude::*,
+};
+
+#[repr(u8)]
+#[derive(Clone, Copy, PartialEq, Eq)]
+pub enum ElfFormat {
+    Elf32 = 1,
+    Elf64 = 2,
+}
+
+#[repr(u8)]
+#[derive(Clone, Copy, PartialEq, Eq)]
+pub enum ElfEndian {
+    Little = 1,
+    Big = 2,
+}
+
+#[repr(u8)]
+#[derive(Clone, Copy, PartialEq, Eq)]
+pub enum ElfABI {
+    // SystemV = 0,
+    Linux = 3,
+}
+
+#[repr(u16)]
+#[derive(Clone, Copy, PartialEq, Eq)]
+pub enum ElfType {
+    Relocatable = 1,
+    Executable = 2,
+    Dynamic = 3,
+    Core = 4,
+}
+
+#[repr(u16)]
+#[derive(Clone, Copy, PartialEq, Eq)]
+pub enum ElfArch {
+    X86 = 0x03,
+    Arm = 0x28,
+    IA64 = 0x32,
+    X86_64 = 0x3e,
+    AArch64 = 0xb7,
+    RiscV = 0xf3,
+}
+
+bitflags! {
+    #[derive(Default, Clone, Copy)]
+    pub struct Elf32PhFlags: u32 {
+        const Exec = 1;
+        const Write = 2;
+        const Read = 4;
+    }
+
+    #[derive(Default, Clone, Copy)]
+    pub struct Elf32ShFlags: u32 {
+        const Write = 1;
+        const Alloc = 2;
+        const Exec = 4;
+        const MaskProc = 0xf0000000;
+    }
+}
+
+#[derive(Default, Clone, Copy, PartialEq, Eq)]
+pub enum Elf32PhType {
+    #[default]
+    Null = 0,
+    Load = 1,
+    Dynamic = 2,
+    Interp = 3,
+    Note = 4,
+    Shlib = 5,
+    Phdr = 6,
+    Tls = 7,
+    Loos = 0x60000000,
+    Hios = 0x6fffffff,
+    Loproc = 0x70000000,
+    Hiproc = 0x7fffffff,
+}
+
+#[derive(Default, Clone, Copy, PartialEq, Eq)]
+pub enum Elf32ShType {
+    #[default]
+    Null = 0,
+    ProgBits = 1,
+    SymTab = 2,
+    StrTab = 3,
+    Rela = 4,
+    Hash = 5,
+    Dynamic = 6,
+    Note = 7,
+    NoBits = 8,
+    Rel = 9,
+    Shlib = 10,
+    DynSym = 11,
+    InitArray = 14,
+    FiniArray = 15,
+    PreInitArray = 16,
+    Group = 17,
+    SymTabShndx = 18,
+    Loos = 0x60000000,
+    Hios = 0x6fffffff,
+    Loproc = 0x70000000,
+    Hiproc = 0x7fffffff,
+}
+
+#[repr(C, packed)]
+#[derive(Clone, Copy)]
+pub struct Elf32Header {
+    /// ELF magic number: 0x7f, "ELF"
+    pub magic: [u8; 4],
+    pub format: ElfFormat,
+    pub endian: ElfEndian,
+    /// ELF version, should be 1
+    pub version: u8,
+    pub abi: ElfABI,
+    pub abi_version: u8,
+    padding: [u8; 7],
+    pub elf_type: ElfType,
+    pub arch: ElfArch,
+    /// ELF version, should be 1
+    pub version2: u32,
+    pub entry: u32,
+    pub ph_offset: u32,
+    pub sh_offset: u32,
+    pub flags: u32,
+    pub eh_size: u16,
+    pub ph_entry_size: u16,
+    pub ph_entry_count: u16,
+    pub sh_entry_size: u16,
+    pub sh_entry_count: u16,
+    pub sh_str_index: u16,
+}
+
+#[repr(C)]
+#[derive(Default, Clone, Copy)]
+pub struct Elf32PhEntry {
+    pub ph_type: Elf32PhType,
+    pub offset: u32,
+    pub vaddr: u32,
+    pub paddr: u32,
+    pub file_size: u32,
+    pub mem_size: u32,
+    pub flags: Elf32PhFlags,
+    /// `0` and `1` for no alignment, otherwise power of `2`
+    pub align: u32,
+}
+
+#[repr(C)]
+#[derive(Default, Clone, Copy)]
+pub struct Elf32ShEntry {
+    pub name_offset: u32,
+    pub sh_type: Elf32ShType,
+    pub flags: Elf32ShFlags,
+    pub addr: u32,
+    pub offset: u32,
+    pub size: u32,
+    pub link: u32,
+    pub info: u32,
+    pub addr_align: u32,
+    pub entry_size: u32,
+}
+
+pub struct ParsedElf32 {
+    entry: u32,
+    file: Arc<Dentry>,
+    phents: Vec<Elf32PhEntry>,
+    shents: Vec<Elf32ShEntry>,
+}
+
+const ELF_MAGIC: [u8; 4] = *b"\x7fELF";
+
+impl Elf32Header {
+    fn check_valid(&self) -> bool {
+        self.magic == ELF_MAGIC
+            && self.version == 1
+            && self.version2 == 1
+            && self.eh_size as usize == size_of::<Elf32Header>()
+            && self.ph_entry_size as usize == size_of::<Elf32PhEntry>()
+            && self.sh_entry_size as usize == size_of::<Elf32ShEntry>()
+    }
+}
+
+impl ParsedElf32 {
+    pub fn parse(file: Arc<Dentry>) -> KResult<Self> {
+        let mut header = UninitBuffer::<Elf32Header>::new();
+        file.read(&mut header, 0)?;
+
+        let header = header.assume_init().ok_or(ENOEXEC)?;
+        if !header.check_valid() {
+            return Err(ENOEXEC);
+        }
+
+        // TODO: Use `UninitBuffer` for `phents` and `shents`.
+        let mut phents = vec![Elf32PhEntry::default(); header.ph_entry_count as usize];
+        let nread = file.read(
+            &mut RawBuffer::new_from_slice(phents.as_mut_slice()),
+            header.ph_offset as usize,
+        )?;
+        if nread != header.ph_entry_count as usize * size_of::<Elf32PhEntry>() {
+            return Err(ENOEXEC);
+        }
+
+        let mut shents = vec![Elf32ShEntry::default(); header.sh_entry_count as usize];
+        let nread = file.read(
+            &mut RawBuffer::new_from_slice(shents.as_mut_slice()),
+            header.sh_offset as usize,
+        )?;
+        if nread != header.sh_entry_count as usize * size_of::<Elf32ShEntry>() {
+            return Err(ENOEXEC);
+        }
+
+        Ok(Self {
+            entry: header.entry,
+            file,
+            phents,
+            shents,
+        })
+    }
+
+    /// Load the ELF file into memory. Return the entry point address.
+    ///
+    /// We clear the user space and load the program headers into memory.
+    /// Can't make a way back if failed from now on.
+    ///
+    /// # Return
+    /// `(entry_ip, sp)`
+    pub fn load(
+        self,
+        mm_list: &MMList,
+        args: Vec<CString>,
+        envs: Vec<CString>,
+    ) -> KResult<(VAddr, VAddr)> {
+        mm_list.clear_user();
+
+        let mut data_segment_end = VAddr(0);
+        for phent in self
+            .phents
+            .into_iter()
+            .filter(|ent| ent.ph_type == Elf32PhType::Load)
+        {
+            let vaddr_start = VAddr(phent.vaddr as usize);
+            let vmem_vaddr_end = vaddr_start + phent.mem_size as usize;
+            let load_vaddr_end = vaddr_start + phent.file_size as usize;
+
+            let vaddr = vaddr_start.floor();
+            let vmem_len = vmem_vaddr_end.ceil() - vaddr;
+            let file_len = load_vaddr_end.ceil() - vaddr;
+            let file_offset = phent.offset as usize & !0xfff;
+
+            let permission = Permission {
+                write: phent.flags.contains(Elf32PhFlags::Write),
+                execute: phent.flags.contains(Elf32PhFlags::Exec),
+            };
+
+            if file_len != 0 {
+                let real_file_length = load_vaddr_end - vaddr;
+                mm_list.mmap_fixed(
+                    vaddr,
+                    file_len,
+                    Mapping::File(FileMapping::new(
+                        self.file.clone(),
+                        file_offset,
+                        real_file_length,
+                    )),
+                    permission,
+                )?;
+            }
+
+            if vmem_len > file_len {
+                mm_list.mmap_fixed(
+                    vaddr + file_len,
+                    vmem_len - file_len,
+                    Mapping::Anonymous,
+                    permission,
+                )?;
+            }
+
+            if vaddr + vmem_len > data_segment_end {
+                data_segment_end = vaddr + vmem_len;
+            }
+        }
+
+        mm_list.register_break(data_segment_end + 0x10000);
+
+        // Map stack area
+        mm_list.mmap_fixed(
+            VAddr(0xc0000000 - 0x800000), // Stack bottom is at 0xc0000000
+            0x800000,                     // 8MB stack size
+            Mapping::Anonymous,
+            Permission {
+                write: true,
+                execute: false,
+            },
+        )?;
+
+        // TODO!!!!!: A temporary workaround.
+        mm_list.switch_page_table();
+
+        let mut sp = 0xc0000000u32;
+        let arg_addrs = args
+            .into_iter()
+            .map(|arg| push_string(&mut sp, arg))
+            .collect::<Vec<_>>();
+
+        let env_addrs = envs
+            .into_iter()
+            .map(|env| push_string(&mut sp, env))
+            .collect::<Vec<_>>();
+
+        let longs = 2 // Null auxiliary vector entry
+            + env_addrs.len() + 1 // Envs + null
+            + arg_addrs.len() + 1 // Args + null
+            + 1; // argc
+
+        sp -= longs as u32 * 4;
+        sp &= !0xf; // Align to 16 bytes
+
+        let mut cursor = (0..longs)
+            .map(|idx| UserPointerMut::<u32>::new_vaddr(sp as usize + size_of::<u32>() * idx));
+
+        // argc
+        cursor.next().unwrap()?.write(arg_addrs.len() as u32)?;
+
+        // args
+        for arg_addr in arg_addrs.into_iter() {
+            cursor.next().unwrap()?.write(arg_addr)?;
+        }
+        cursor.next().unwrap()?.write(0)?; // null
+
+        // envs
+        for env_addr in env_addrs.into_iter() {
+            cursor.next().unwrap()?.write(env_addr)?;
+        }
+        cursor.next().unwrap()?.write(0)?; // null
+
+        // Null auxiliary vector
+        cursor.next().unwrap()?.write(0)?; // AT_NULL
+        cursor.next().unwrap()?.write(0)?; // AT_NULL
+
+        // TODO!!!!!: A temporary workaround.
+        Thread::current().process.mm_list.switch_page_table();
+
+        assert!(cursor.next().is_none());
+        Ok((VAddr(self.entry as usize), VAddr(sp as usize)))
+    }
+}
+
+fn push_string(sp: &mut u32, string: CString) -> u32 {
+    let data = string.as_bytes_with_nul();
+    let new_sp = (*sp - data.len() as u32) & !0x3; // Align to 4 bytes
+
+    CheckedUserPointer::new(new_sp as *const u8, data.len())
+        .unwrap()
+        .write(data.as_ptr() as _, data.len())
+        .unwrap();
+
+    *sp = new_sp;
+    new_sp
+}

+ 1 - 1
src/fs/procfs.rs

@@ -69,7 +69,7 @@ impl ProcFsNode {
 }
 
 define_struct_inode! {
-    struct FileInode {
+    pub struct FileInode {
         file: Box<dyn ProcFsFile>,
     }
 }

+ 8 - 0
src/io.rs

@@ -82,6 +82,14 @@ impl<'lt, T: Copy + Sized> UninitBuffer<'lt, T> {
 
         Ok(unsafe { self.data.assume_init_ref() })
     }
+
+    pub fn assume_init(self) -> Option<T> {
+        if self.buffer.filled() {
+            Some(unsafe { *self.data.assume_init() })
+        } else {
+            None
+        }
+    }
 }
 
 impl<'lt, T: Copy + Sized> Buffer for UninitBuffer<'lt, T> {

+ 33 - 28
src/kernel.ld

@@ -33,6 +33,7 @@ SECTIONS
     .text :
         AT(LOADADDR(.stage1) + SIZEOF(.stage1))
     {
+        KIMAGE_START = .;
         TEXT_START = .;
         *(.text)
         *(.text*)
@@ -57,15 +58,19 @@ SECTIONS
         end_ctors = .;
 
         . = ALIGN(16);
-        FIX_START = .;
+        _fix_start = .;
         KEEP(*(.fix));
-        FIX_END = .;
+        _fix_end = .;
 
         . = ALIGN(16);
         BSS_ADDR = .;
         QUAD(ABSOLUTE(BSS_START));
         BSS_LENGTH = .;
         QUAD(BSS_END - BSS_START);
+        FIX_START = .;
+        QUAD(ABSOLUTE(_fix_start));
+        FIX_END = .;
+        QUAD(ABSOLUTE(_fix_end));
 
         . = ALIGN(0x1000);
         RODATA_END = .;
@@ -113,42 +118,42 @@ SECTIONS
     } > KIMAGE
 
     /* Stabs debugging sections.  */
-    .stab          0 : { *(.stab) }
-    .stabstr       0 : { *(.stabstr) }
-    .stab.excl     0 : { *(.stab.excl) }
-    .stab.exclstr  0 : { *(.stab.exclstr) }
-    .stab.index    0 : { *(.stab.index) }
-    .stab.indexstr 0 : { *(.stab.indexstr) }
-    .comment       0 : { *(.comment) }
+    .stab          0 : { KEEP(*(.stab)); }
+    .stabstr       0 : { KEEP(*(.stabstr)); }
+    .stab.excl     0 : { KEEP(*(.stab.excl)); }
+    .stab.exclstr  0 : { KEEP(*(.stab.exclstr)); }
+    .stab.index    0 : { KEEP(*(.stab.index)); }
+    .stab.indexstr 0 : { KEEP(*(.stab.indexstr)); }
+    .comment       0 : { KEEP(*(.comment)); }
     /* DWARF debug sections.
        Symbols in the DWARF debugging sections are relative to the beginning
        of the section so we begin them at 0.  */
     /* DWARF 1 */
-    .debug          0 : { *(.debug) }
-    .line           0 : { *(.line) }
+    .debug          0 : { KEEP(*(.debug)); }
+    .line           0 : { KEEP(*(.line)); }
     /* GNU DWARF 1 extensions */
-    .debug_srcinfo  0 : { *(.debug_srcinfo) }
-    .debug_sfnames  0 : { *(.debug_sfnames) }
+    .debug_srcinfo  0 : { KEEP(*(.debug_srcinfo)); }
+    .debug_sfnames  0 : { KEEP(*(.debug_sfnames)); }
     /* DWARF 1.1 and DWARF 2 */
-    .debug_aranges  0 : { *(.debug_aranges) }
-    .debug_pubnames 0 : { *(.debug_pubnames) }
+    .debug_aranges  0 : { KEEP(*(.debug_aranges)); }
+    .debug_pubnames 0 : { KEEP(*(.debug_pubnames)); }
     /* DWARF 2 */
-    .debug_info     0 : { *(.debug_info) }
-    .debug_abbrev   0 : { *(.debug_abbrev) }
-    .debug_line     0 : { *(.debug_line) }
-    .debug_frame    0 : { *(.debug_frame) }
-    .debug_str      0 : { *(.debug_str) }
-    .debug_loc      0 : { *(.debug_loc) }
-    .debug_macinfo  0 : { *(.debug_macinfo) }
+    .debug_info     0 : { KEEP(*(.debug_info)); }
+    .debug_abbrev   0 : { KEEP(*(.debug_abbrev)); }
+    .debug_line     0 : { KEEP(*(.debug_line)); }
+    .debug_frame    0 : { KEEP(*(.debug_frame)); }
+    .debug_str      0 : { KEEP(*(.debug_str)); }
+    .debug_loc      0 : { KEEP(*(.debug_loc)); }
+    .debug_macinfo  0 : { KEEP(*(.debug_macinfo)); }
     /* SGI/MIPS DWARF 2 extensions */
-    .debug_weaknames 0 : { *(.debug_weaknames) }
-    .debug_funcnames 0 : { *(.debug_funcnames) }
-    .debug_typenames 0 : { *(.debug_typenames) }
-    .debug_varnames  0 : { *(.debug_varnames) }
+    .debug_weaknames 0 : { KEEP(*(.debug_weaknames)); }
+    .debug_funcnames 0 : { KEEP(*(.debug_funcnames)); }
+    .debug_typenames 0 : { KEEP(*(.debug_typenames)); }
+    .debug_varnames  0 : { KEEP(*(.debug_varnames)); }
 
     /* DWARF Other */
-    .debug_ranges  0 : { *(.debug_ranges) }
-    .debug_line_str 0 : { *(.debug_line_str) }
+    .debug_ranges  0 : { KEEP(*(.debug_ranges)); }
+    .debug_line_str 0 : { KEEP(*(.debug_line_str)); }
     /* Rust stuff */
 
     /DISCARD/ :

+ 1 - 1
src/kernel.rs

@@ -1,5 +1,6 @@
 pub mod block;
 pub mod console;
+pub mod constants;
 pub mod interrupt;
 pub mod mem;
 pub mod syscall;
@@ -9,7 +10,6 @@ pub mod user;
 pub mod vfs;
 
 mod chardev;
-mod constants;
 mod terminal;
 
 pub use chardev::{CharDevice, CharDeviceType, VirtualCharDevice};

+ 7 - 29
src/kernel/async/lock.cc

@@ -1,5 +1,4 @@
 #include <assert.h>
-#include <stdint.h>
 
 #include <kernel/async/lock.hpp>
 
@@ -49,33 +48,20 @@ static inline void _restore_interrupt_state(lock_context_t context) {
         :);
 }
 
-// TODO: mark as _per_cpu
-static inline preempt_count_t& _preempt_count() {
-    static preempt_count_t _preempt_count;
-    assert(_preempt_count >= 0);
-    return _preempt_count;
-}
+extern "C" void r_preempt_disable();
+extern "C" void r_preempt_enable();
+extern "C" unsigned long r_preempt_count();
 
 void preempt_disable() {
-    ++_preempt_count();
-    asm volatile("" : : : "memory");
+    r_preempt_disable();
 }
 
 void preempt_enable() {
-    asm volatile("" : : : "memory");
-    --_preempt_count();
-}
-
-extern "C" void r_preempt_disable() {
-    preempt_disable();
+    r_preempt_enable();
 }
 
-extern "C" void r_preempt_enable() {
-    preempt_enable();
-}
-
-preempt_count_t preempt_count() {
-    return _preempt_count();
+unsigned long preempt_count() {
+    return r_preempt_count();
 }
 
 void spin_lock(spinlock_t& lock) {
@@ -107,14 +93,6 @@ mutex::~mutex() {
     assert(m_lock == 0);
 }
 
-void mutex::lock() {
-    spin_lock(m_lock);
-}
-
-void mutex::unlock() {
-    spin_unlock(m_lock);
-}
-
 lock_context_t mutex::lock_irq() {
     return spin_lock_irqsave(m_lock);
 }

+ 3 - 0
src/kernel/console.rs

@@ -79,6 +79,9 @@ macro_rules! println_info {
 }
 
 macro_rules! println_fatal {
+    () => {
+        $crate::println!("[kernel:fatal] ")
+    };
     ($($arg:tt)*) => {
         $crate::println!("[kernel:fatal] {}", format_args!($($arg)*))
     };

+ 2 - 0
src/kernel/constants.rs

@@ -16,6 +16,8 @@ pub const SIG_SETMASK: u32 = 2;
 pub const CLOCK_REALTIME: u32 = 0;
 pub const CLOCK_MONOTONIC: u32 = 1;
 
+pub const ENOEXEC: u32 = 8;
+
 bitflags! {
     pub struct UserMmapFlags: u32 {
         const MAP_SHARED = 0x01;

+ 2 - 1
src/kernel/hw/pci.cc

@@ -87,10 +87,11 @@ int register_driver_r(uint16_t vendor, uint16_t device,
 
 namespace kernel::kinit {
 
-void init_pci() {
+extern "C" void init_pci() {
     using namespace hw::acpi;
     using namespace hw::pci;
 
+    assert(parse_acpi_tables() == 0);
     auto* mcfg = (MCFG*)get_table("MCFG");
     assert(mcfg);
 

+ 0 - 142
src/kernel/interrupt.cpp

@@ -1,142 +0,0 @@
-#include "kernel/async/lock.hpp"
-
-#include <list>
-#include <vector>
-
-#include <assert.h>
-#include <stdint.h>
-
-#include <types/types.h>
-
-#include <kernel/hw/port.hpp>
-#include <kernel/interrupt.hpp>
-#include <kernel/irq.hpp>
-#include <kernel/log.hpp>
-#include <kernel/mem/paging.hpp>
-#include <kernel/process.hpp>
-#include <kernel/vfs.hpp>
-
-#define KERNEL_INTERRUPT_GATE_TYPE (0x8e)
-#define USER_INTERRUPT_GATE_TYPE (0xee)
-
-constexpr kernel::hw::p8 port_pic1_command{0x20};
-constexpr kernel::hw::p8 port_pic1_data{0x21};
-constexpr kernel::hw::p8 port_pic2_command{0xa0};
-constexpr kernel::hw::p8 port_pic2_data{0xa1};
-
-struct IDT_entry {
-    uint16_t offset_low;
-    uint16_t segment;
-
-    uint8_t IST;
-    uint8_t attributes;
-
-    uint16_t offset_mid;
-    uint32_t offset_high;
-    uint32_t reserved;
-};
-
-static struct IDT_entry IDT[256];
-
-extern "C" uintptr_t ISR_START_ADDR;
-
-static inline void set_idt_entry(IDT_entry (&idt)[256], int n, uintptr_t offset, uint16_t selector,
-                                 uint8_t type) {
-    idt[n].offset_low = offset & 0xffff;
-    idt[n].segment = selector;
-    idt[n].IST = 0;
-    idt[n].attributes = type;
-    idt[n].offset_mid = (offset >> 16) & 0xffff;
-    idt[n].offset_high = (offset >> 32) & 0xffffffff;
-    idt[n].reserved = 0;
-}
-
-using kernel::irq::irq_handler_t;
-static std::vector<std::list<irq_handler_t>> s_irq_handlers;
-
-void kernel::kinit::init_interrupt() {
-    for (int i = 0; i < 0x30; ++i)
-        set_idt_entry(IDT, i, ISR_START_ADDR + 8 * i, 0x08, KERNEL_INTERRUPT_GATE_TYPE);
-    set_idt_entry(IDT, 0x80, ISR_START_ADDR + 8 * 0x80, 0x08, USER_INTERRUPT_GATE_TYPE);
-
-    uint64_t idt_descriptor[2];
-    idt_descriptor[0] = (sizeof(IDT_entry) * 256) << 48;
-    idt_descriptor[1] = (uintptr_t)IDT;
-
-    // initialize PIC
-    asm volatile("lidt (%0)" : : "r"((uintptr_t)idt_descriptor + 6) :);
-    s_irq_handlers.resize(16);
-
-    // TODO: move this to timer driver
-    kernel::irq::register_handler(0, []() {
-        kernel::hw::timer::inc_tick();
-        if (async::preempt_count() == 0)
-            schedule_now();
-    });
-
-    port_pic1_command = 0x11; // edge trigger mode
-    port_pic1_data = 0x20;    // start from int 0x20
-    port_pic1_data = 0x04;    // PIC1 is connected to IRQ2 (1 << 2)
-    port_pic1_data = 0x01;    // no buffer mode
-
-    port_pic2_command = 0x11; // edge trigger mode
-    port_pic2_data = 0x28;    // start from int 0x28
-    port_pic2_data = 0x02;    // connected to IRQ2
-    port_pic2_data = 0x01;    // no buffer mode
-
-    // allow all the interrupts
-    port_pic1_data = 0x00;
-    port_pic2_data = 0x00;
-}
-
-void kernel::irq::register_handler(int irqno, irq_handler_t handler) {
-    s_irq_handlers[irqno].emplace_back(std::move(handler));
-}
-
-static inline void fault_handler(interrupt_stack* context, mmx_registers*) {
-    switch (context->int_no) {
-        case 6:
-        case 8: {
-            assert(false);
-            if (!current_process->attr.system)
-                kill_current(SIGSEGV); // noreturn
-        } break;
-        case 13: {
-            if (!current_process->attr.system)
-                kill_current(SIGILL); // noreturn
-        } break;
-        case 14: {
-            kernel::mem::paging::handle_page_fault(context);
-            return;
-        } break;
-    }
-
-    // fault can not be resolved
-    freeze();
-}
-
-extern "C" void irq_handler_rust(int irqno);
-
-static inline void irq_handler(interrupt_stack* context, mmx_registers*) {
-    int irqno = context->int_no - 0x20;
-
-    constexpr uint8_t PIC_EOI = 0x20;
-
-    for (const auto& handler : s_irq_handlers[irqno])
-        handler();
-
-    irq_handler_rust(irqno);
-
-    port_pic1_command = PIC_EOI;
-    if (irqno >= 8)
-        port_pic2_command = PIC_EOI;
-}
-
-extern "C" void interrupt_handler(interrupt_stack* context, mmx_registers* mmxregs) {
-    if (context->int_no < 0x20) // interrupt is a fault
-        fault_handler(context, mmxregs);
-    else if (context->int_no == 0x80) // syscall by int 0x80
-        kernel::handle_syscall32(context->regs.rax, context, mmxregs);
-    else
-        irq_handler(context, mmxregs);
-}

+ 118 - 6
src/kernel/interrupt.rs

@@ -4,23 +4,111 @@ use alloc::vec::Vec;
 
 use lazy_static::lazy_static;
 
-use crate::bindings::root::EINVAL;
-use crate::Spin;
+use crate::bindings::root::{interrupt_stack, mmx_registers, EINVAL};
+use crate::{driver::Port8, prelude::*};
+
+use super::mem::handle_page_fault;
+use super::syscall::handle_syscall32;
+use super::task::{ProcessList, Signal};
+
+const PIC1_COMMAND: Port8 = Port8::new(0x20);
+const PIC1_DATA: Port8 = Port8::new(0x21);
+const PIC2_COMMAND: Port8 = Port8::new(0xA0);
+const PIC2_DATA: Port8 = Port8::new(0xA1);
+
+#[repr(C)]
+#[derive(Clone, Copy)]
+struct IDTEntry {
+    offset_low: u16,
+    selector: u16,
+
+    interrupt_stack: u8,
+    attributes: u8,
+
+    offset_mid: u16,
+    offset_high: u32,
+    reserved: u32,
+}
+
+extern "C" {
+    static ISR_START_ADDR: usize;
+}
 
 lazy_static! {
     static ref IRQ_HANDLERS: Spin<[Vec<Box<dyn Fn() + Send>>; 16]> =
         Spin::new(core::array::from_fn(|_| vec![]));
+    static ref IDT: [IDTEntry; 256] = core::array::from_fn(|idx| {
+        match idx {
+            0..0x80 => IDTEntry::new(unsafe { ISR_START_ADDR } + 8 * idx, 0x08, 0x8e),
+            0x80 => IDTEntry::new(unsafe { ISR_START_ADDR } + 8 * idx, 0x08, 0xee),
+            _ => IDTEntry::null(),
+        }
+    });
 }
 
-#[no_mangle]
-pub extern "C" fn irq_handler_rust(irqno: core::ffi::c_int) {
-    assert!(irqno >= 0 && irqno < 16);
+impl IDTEntry {
+    const fn new(offset: usize, selector: u16, attributes: u8) -> Self {
+        Self {
+            offset_low: offset as u16,
+            selector,
+            interrupt_stack: 0,
+            attributes,
+            offset_mid: (offset >> 16) as u16,
+            offset_high: (offset >> 32) as u32,
+            reserved: 0,
+        }
+    }
 
-    let handlers = IRQ_HANDLERS.lock();
+    const fn null() -> Self {
+        Self {
+            offset_low: 0,
+            selector: 0,
+            interrupt_stack: 0,
+            attributes: 0,
+            offset_mid: 0,
+            offset_high: 0,
+            reserved: 0,
+        }
+    }
+}
+
+fn irq_handler(irqno: usize) {
+    assert!(irqno < 16);
 
+    let handlers = IRQ_HANDLERS.lock();
     for handler in handlers[irqno as usize].iter() {
         handler();
     }
+
+    PIC1_COMMAND.write(0x20); // EOI
+    if irqno >= 8 {
+        PIC2_COMMAND.write(0x20); // EOI
+    }
+}
+
+fn fault_handler(int_stack: &mut interrupt_stack) {
+    match int_stack.int_no {
+        // Invalid Op or Double Fault
+        14 => handle_page_fault(int_stack),
+        13 if int_stack.ss == 0 => ProcessList::kill_current(Signal::SIGILL),
+        6 | 8 if int_stack.ss == 0 => ProcessList::kill_current(Signal::SIGSEGV),
+        _ => panic!("Unhandled fault: {}", int_stack.int_no),
+    }
+}
+
+#[no_mangle]
+pub extern "C" fn interrupt_handler(int_stack: *mut interrupt_stack, mmxregs: *mut mmx_registers) {
+    let int_stack = unsafe { &mut *int_stack };
+    let mmxregs = unsafe { &mut *mmxregs };
+
+    match int_stack.int_no {
+        // Fault
+        0..0x20 => fault_handler(int_stack),
+        // Syscall
+        0x80 => handle_syscall32(int_stack.regs.rax as usize, int_stack, mmxregs),
+        // IRQ
+        no => irq_handler(no as usize - 0x20),
+    }
 }
 
 pub fn register_irq_handler<F>(irqno: i32, handler: F) -> Result<(), u32>
@@ -34,3 +122,27 @@ where
     IRQ_HANDLERS.lock_irq()[irqno as usize].push(Box::new(handler));
     Ok(())
 }
+
+pub fn init() -> KResult<()> {
+    arch::x86_64::interrupt::lidt(
+        IDT.as_ptr() as usize,
+        (size_of::<IDTEntry>() * 256 - 1) as u16,
+    );
+
+    // Initialize PIC
+    PIC1_COMMAND.write(0x11); // edge trigger mode
+    PIC1_DATA.write(0x20); // IRQ 0-7 offset
+    PIC1_DATA.write(0x04); // cascade with slave PIC
+    PIC1_DATA.write(0x01); // no buffer mode
+
+    PIC2_COMMAND.write(0x11); // edge trigger mode
+    PIC2_DATA.write(0x28); // IRQ 8-15 offset
+    PIC2_DATA.write(0x02); // cascade with master PIC
+    PIC2_DATA.write(0x01); // no buffer mode
+
+    // Allow all IRQs
+    PIC1_DATA.write(0x0);
+    PIC2_DATA.write(0x0);
+
+    Ok(())
+}

+ 2 - 2
src/kernel/mem.rs

@@ -7,6 +7,6 @@ mod page_table;
 mod vrange;
 
 pub(self) use mm_area::MMArea;
-pub use mm_list::{MMList, Mapping, Permission, FileMapping};
-pub(self) use page_table::{PTEIterator, PageTable, PTE};
+pub use mm_list::{handle_page_fault, FileMapping, MMList, Mapping, PageFaultError, Permission};
+pub(self) use page_table::{PageTable, PTE};
 pub use vrange::{VAddr, VRange};

+ 2 - 2
src/kernel/mem/mm_area.rs

@@ -5,8 +5,8 @@ use super::{Mapping, Permission, VAddr, VRange};
 #[derive(Debug)]
 pub struct MMArea {
     range: UnsafeCell<VRange>,
-    mapping: Mapping,
-    permission: Permission,
+    pub(super) mapping: Mapping,
+    pub(super) permission: Permission,
 }
 
 impl Clone for MMArea {

+ 45 - 17
src/kernel/mem/mm_list.rs

@@ -1,3 +1,5 @@
+mod page_fault;
+
 use crate::prelude::*;
 
 use alloc::{collections::btree_set::BTreeSet, sync::Arc};
@@ -7,10 +9,15 @@ use crate::kernel::vfs::dentry::Dentry;
 
 use super::{MMArea, PageTable, VAddr, VRange};
 
+pub use page_fault::{handle_page_fault, PageFaultError};
+
 #[derive(Debug, Clone)]
 pub struct FileMapping {
     file: Arc<Dentry>,
+    /// Offset in the file, aligned to 4KB boundary.
     offset: usize,
+    /// Length of the mapping. Exceeding part will be zeroed.
+    length: usize,
 }
 
 #[derive(Debug, Clone, Copy)]
@@ -37,17 +44,29 @@ struct MMListInner {
 pub struct MMList {
     /// # Safety
     /// This field might be used in IRQ context, so it should be locked with `lock_irq()`.
-    inner: Spin<MMListInner>,
+    inner: Mutex<MMListInner>,
 }
 
 impl FileMapping {
-    pub fn new(file: Arc<Dentry>, offset: usize) -> Self {
+    pub fn new(file: Arc<Dentry>, offset: usize, length: usize) -> Self {
         assert_eq!(offset & 0xfff, 0);
-        Self { file, offset }
+        Self {
+            file,
+            offset,
+            length,
+        }
     }
 
     pub fn offset(&self, offset: usize) -> Self {
-        Self::new(self.file.clone(), self.offset + offset)
+        if self.length <= offset {
+            Self::new(self.file.clone(), self.offset + self.length, 0)
+        } else {
+            Self::new(
+                self.file.clone(),
+                self.offset + offset,
+                self.length - offset,
+            )
+        }
     }
 }
 
@@ -66,7 +85,7 @@ impl MMListInner {
     }
 
     fn check_overlapping_addr(&self, addr: VAddr) -> bool {
-        addr.is_user() && self.overlapping_addr(addr).is_some()
+        addr.is_user() && self.overlapping_addr(addr).is_none()
     }
 
     fn overlapping_range(&self, range: VRange) -> impl DoubleEndedIterator<Item = &MMArea> + '_ {
@@ -74,7 +93,7 @@ impl MMListInner {
     }
 
     fn check_overlapping_range(&self, range: VRange) -> bool {
-        range.is_user() && self.overlapping_range(range).next().is_some()
+        range.is_user() && self.overlapping_range(range).next().is_none()
     }
 
     fn find_available(&self, hint: VAddr, len: usize) -> Option<VAddr> {
@@ -210,7 +229,7 @@ impl MMListInner {
 impl MMList {
     pub fn new() -> Arc<Self> {
         Arc::new(Self {
-            inner: Spin::new(MMListInner {
+            inner: Mutex::new(MMListInner {
                 areas: BTreeSet::new(),
                 page_table: PageTable::new(),
                 break_start: None,
@@ -226,7 +245,7 @@ impl MMList {
         let inner = self.inner.lock_irq();
 
         let list = Arc::new(Self {
-            inner: Spin::new(MMListInner {
+            inner: Mutex::new(MMListInner {
                 areas: inner.areas.clone(),
                 page_table: PageTable::new(),
                 break_start: inner.break_start,
@@ -239,8 +258,8 @@ impl MMList {
             let list_inner = list.inner.lock();
 
             for area in list_inner.areas.iter() {
-                let new_iter = list_inner.page_table.iter_user(area.range());
-                let old_iter = inner.page_table.iter_user(area.range());
+                let new_iter = list_inner.page_table.iter_user(area.range()).unwrap();
+                let old_iter = inner.page_table.iter_user(area.range()).unwrap();
 
                 for (new, old) in new_iter.zip(old_iter) {
                     new.setup_cow(old);
@@ -267,20 +286,18 @@ impl MMList {
         self.inner.lock_irq().unmap(start, len)
     }
 
-    pub fn mmap(
+    pub fn mmap_hint(
         &self,
-        at: VAddr,
+        hint: VAddr,
         len: usize,
         mapping: Mapping,
         permission: Permission,
-        fixed: bool,
     ) -> KResult<VAddr> {
         let mut inner = self.inner.lock_irq();
-        match inner.mmap(at, len, mapping.clone(), permission) {
-            Ok(()) => Ok(at),
-            Err(EEXIST) if fixed => Err(EEXIST),
+        match inner.mmap(hint, len, mapping.clone(), permission) {
+            Ok(()) => Ok(hint),
             Err(EEXIST) => {
-                let at = inner.find_available(at, len).ok_or(ENOMEM)?;
+                let at = inner.find_available(hint, len).ok_or(ENOMEM)?;
                 inner.mmap(at, len, mapping, permission)?;
                 Ok(at)
             }
@@ -288,6 +305,17 @@ impl MMList {
         }
     }
 
+    pub fn mmap_fixed(
+        &self,
+        at: VAddr,
+        len: usize,
+        mapping: Mapping,
+        permission: Permission,
+    ) -> KResult<VAddr> {
+        let mut inner = self.inner.lock_irq();
+        inner.mmap(at, len, mapping.clone(), permission).map(|_| at)
+    }
+
     pub fn set_break(&self, pos: Option<VAddr>) -> VAddr {
         self.inner.lock_irq().set_break(pos)
     }

+ 202 - 0
src/kernel/mem/mm_list/page_fault.rs

@@ -0,0 +1,202 @@
+use bindings::kernel::mem::paging::pfn_to_page;
+use bindings::{PA_A, PA_ANON, PA_COW, PA_MMAP, PA_P, PA_RW};
+use bitflags::bitflags;
+
+use crate::bindings::root::interrupt_stack;
+use crate::kernel::mem::paging::{Page, PageBuffer};
+use crate::kernel::mem::phys::{CachedPP, PhysPtr};
+use crate::kernel::mem::{Mapping, VRange};
+use crate::kernel::task::{ProcessList, Signal, Thread};
+use crate::prelude::*;
+
+use super::{MMList, VAddr};
+
+bitflags! {
+    pub struct PageFaultError: u64 {
+        const Present = 0x0001;
+        const Write = 0x0002;
+        const User = 0x0004;
+        const ReservedSet = 0x0008;
+        const InstructionFetch = 0x0010;
+        const ProtectionKey = 0x0020;
+        const SGX = 0x8000;
+    }
+}
+
+#[repr(C)]
+struct FixEntry {
+    start: u64,
+    length: u64,
+    jump_address: u64,
+    op_type: u64,
+}
+
+impl MMList {
+    fn handle_page_fault(
+        &self,
+        int_stack: &mut interrupt_stack,
+        addr: VAddr,
+        error: PageFaultError,
+    ) -> Result<(), Signal> {
+        let inner = self.inner.lock();
+        let area = match inner.areas.get(&VRange::from(addr)) {
+            Some(area) => area,
+            None => {
+                if error.contains(PageFaultError::User) {
+                    return Err(Signal::SIGBUS);
+                } else {
+                    try_page_fault_fix(int_stack, addr);
+                    return Ok(());
+                }
+            }
+        };
+
+        // User access permission violation, check user access permission.
+        if error.contains(PageFaultError::User | PageFaultError::Present) {
+            if error.contains(PageFaultError::Write) && !area.permission.write {
+                ProcessList::kill_current(Signal::SIGSEGV)
+            }
+
+            if error.contains(PageFaultError::InstructionFetch) && !area.permission.execute {
+                ProcessList::kill_current(Signal::SIGSEGV)
+            }
+        }
+
+        let pte = inner
+            .page_table
+            .iter_user(VRange::new(addr.floor(), addr.floor() + 0x1000))
+            .unwrap()
+            .next()
+            .expect("If we can find the mapped area, we should be able to find the PTE");
+
+        let is_mapped = matches!(&area.mapping, Mapping::File(_));
+        if !is_mapped && !error.contains(PageFaultError::Present) {
+            try_page_fault_fix(int_stack, addr);
+            return Ok(());
+        }
+
+        let mut pfn = pte.pfn();
+        let mut attributes = pte.attributes();
+
+        if attributes & PA_COW as usize != 0 {
+            attributes &= !PA_COW as usize;
+            if area.permission.write {
+                attributes |= PA_RW as usize;
+            } else {
+                attributes &= !PA_RW as usize;
+            }
+
+            // TODO!!!: Change this.
+            let page = unsafe { pfn_to_page(pfn).as_mut().unwrap() };
+            if page.refcount == 1 {
+                pte.set_attributes(attributes);
+                return Ok(());
+            }
+
+            let new_page = Page::alloc_one();
+            if attributes & PA_ANON as usize != 0 {
+                new_page.zero();
+            } else {
+                new_page
+                    .as_cached()
+                    .as_mut_slice::<u8>(0x1000)
+                    .copy_from_slice(CachedPP::new(pfn).as_slice(0x1000));
+            }
+
+            attributes &= !(PA_A | PA_ANON) as usize;
+            page.refcount -= 1;
+
+            pfn = new_page.into_pfn();
+            pte.set(pfn, attributes);
+        }
+
+        // TODO: shared mapping
+        if attributes & PA_MMAP as usize != 0 {
+            attributes |= PA_P as usize;
+
+            if let Mapping::File(mapping) = &area.mapping {
+                let load_offset = addr.floor() - area.range().start();
+                if load_offset < mapping.length {
+                    // SAFETY: Since we are here, the `pfn` must refer to a valid buddy page.
+                    let page = unsafe { Page::from_pfn(pfn, 0) };
+                    let nread = mapping
+                        .file
+                        .read(
+                            &mut PageBuffer::new(page.clone()),
+                            mapping.offset + load_offset,
+                        )
+                        .map_err(|_| Signal::SIGBUS)?;
+
+                    if nread < page.len() {
+                        page.as_cached().as_mut_slice::<u8>(0x1000)[nread..].fill(0);
+                    }
+
+                    if mapping.length - load_offset < 0x1000 {
+                        let length_to_end = mapping.length - load_offset;
+                        page.as_cached().as_mut_slice::<u8>(0x1000)[length_to_end..].fill(0);
+                    }
+                }
+                // Otherwise, the page is kept zero emptied.
+
+                attributes &= !PA_MMAP as usize;
+                pte.set_attributes(attributes);
+            } else {
+                panic!("Anonymous mapping should not be PA_MMAP");
+            }
+        }
+
+        Ok(())
+    }
+}
+
+extern "C" {
+    static FIX_START: *const FixEntry;
+    static FIX_END: *const FixEntry;
+}
+
+/// Try to fix the page fault by jumping to the `error` address.
+///
+/// Panic if we can't find the `ip` in the fix list.
+fn try_page_fault_fix(int_stack: &mut interrupt_stack, addr: VAddr) {
+    let ip = int_stack.v_rip as u64;
+    // TODO: Use `op_type` to fix.
+
+    // SAFETY: `FIX_START` and `FIX_END` are defined in the linker script in `.rodata` section.
+    let entries = unsafe {
+        core::slice::from_raw_parts(
+            FIX_START,
+            (FIX_END as usize - FIX_START as usize) / size_of::<FixEntry>(),
+        )
+    };
+
+    for entry in entries.iter() {
+        if ip >= entry.start && ip < entry.start + entry.length {
+            int_stack.v_rip = entry.jump_address as usize;
+            return;
+        }
+    }
+
+    kernel_page_fault_die(addr, ip as usize)
+}
+
+fn kernel_page_fault_die(vaddr: VAddr, ip: usize) -> ! {
+    panic!(
+        "Invalid kernel mode memory access to {:#8x} while executing the instruction at {:#8x}",
+        vaddr.0, ip
+    )
+}
+
+pub fn handle_page_fault(int_stack: &mut interrupt_stack) {
+    let error = PageFaultError::from_bits_truncate(int_stack.error_code);
+    let vaddr = VAddr(arch::x86_64::vm::get_cr2());
+
+    let result = Thread::current()
+        .process
+        .mm_list
+        .handle_page_fault(int_stack, vaddr, error);
+
+    if let Err(signal) = result {
+        println_debug!("Page fault in user space at {:#x}", vaddr.0);
+        ProcessList::kill_current(signal)
+    }
+}

+ 53 - 39
src/kernel/mem/page_table.rs

@@ -1,3 +1,5 @@
+use lazy_static::lazy_static;
+
 use crate::prelude::*;
 
 use crate::bindings::root::{EINVAL, KERNEL_PML4};
@@ -9,8 +11,6 @@ use super::{
 };
 use super::{MMArea, Permission};
 
-const EMPTY_PAGE_PFN: usize = 0x8000;
-
 const PA_P: usize = 0x001;
 const PA_RW: usize = 0x002;
 const PA_US: usize = 0x004;
@@ -35,22 +35,30 @@ pub struct PageTable {
     page: Page,
 }
 
-pub struct PTEIterator<'lt, const Kernel: bool> {
+pub struct PTEIterator<'lt, const KERNEL: bool> {
     count: usize,
     i4: u16,
     i3: u16,
     i2: u16,
     i1: u16,
-    p4: Page,
-    p3: Page,
-    p2: Page,
-    p1: Page,
+    p4: CachedPP,
+    p3: CachedPP,
+    p2: CachedPP,
+    p1: CachedPP,
 
     start: VAddr,
     end: VAddr,
     _phantom: core::marker::PhantomData<&'lt ()>,
 }
 
+lazy_static! {
+    static ref EMPTY_PAGE: Page = {
+        let page = Page::alloc_one();
+        page.zero();
+        page
+    };
+}
+
 impl PTE {
     pub fn is_user(&self) -> bool {
         self.0 & PA_US != 0
@@ -61,11 +69,11 @@ impl PTE {
     }
 
     pub fn pfn(&self) -> usize {
-        self.0 & !0xfff
+        self.0 & !PA_MASK
     }
 
     pub fn attributes(&self) -> usize {
-        self.0 & 0xfff
+        self.0 & PA_MASK
     }
 
     pub fn set(&mut self, pfn: usize, attributes: usize) {
@@ -80,7 +88,7 @@ impl PTE {
         self.set(self.pfn(), attributes)
     }
 
-    pub fn parse_page_table(&mut self, kernel: bool) -> Page {
+    fn parse_page_table(&mut self, kernel: bool) -> CachedPP {
         let attributes = if kernel {
             PA_P | PA_RW | PA_G
         } else {
@@ -88,19 +96,20 @@ impl PTE {
         };
 
         if self.is_present() {
-            Page::get(self.pfn(), 0)
+            CachedPP::new(self.pfn())
         } else {
             let page = Page::alloc_one();
+            let pp = page.as_cached();
             page.zero();
-            self.set(page.as_phys(), attributes);
 
-            page
+            self.set(page.into_pfn(), attributes);
+            pp
         }
     }
 
     pub fn setup_cow(&mut self, from: &mut Self) {
         self.set(
-            Page::get(from.pfn(), 0).into_pfn(),
+            unsafe { Page::from_pfn(from.pfn(), 0) }.into_pfn(),
             (from.attributes() & !(PA_RW | PA_A | PA_D)) | PA_COW,
         );
 
@@ -115,21 +124,22 @@ impl PTE {
     pub fn take(&mut self) -> Page {
         // SAFETY: Acquire the ownership of the page from the page table and then
         // clear the PTE so no one could be able to access the page from here later on.
-        let page = unsafe { Page::from_pfn(self.pfn(), 0) };
+        let page = unsafe { Page::take_pfn(self.pfn(), 0) };
         self.clear();
         page
     }
 }
 
-impl<const Kernel: bool> PTEIterator<'_, Kernel> {
-    fn new(pt: Page, start: VAddr, end: VAddr) -> KResult<Self> {
-        if start >= end {
+impl<'lt, const KERNEL: bool> PTEIterator<'lt, KERNEL> {
+    fn new(pt: &'lt Page, start: VAddr, end: VAddr) -> KResult<Self> {
+        if start > end {
             return Err(EINVAL);
         }
 
-        let p3 = pt.as_page_table()[Self::index(4, start)].parse_page_table(Kernel);
-        let p2 = pt.as_page_table()[Self::index(3, start)].parse_page_table(Kernel);
-        let p1 = pt.as_page_table()[Self::index(2, start)].parse_page_table(Kernel);
+        let p4 = pt.as_cached();
+        let p3 = p4.as_mut_slice::<PTE>(512)[Self::index(4, start)].parse_page_table(KERNEL);
+        let p2 = p3.as_mut_slice::<PTE>(512)[Self::index(3, start)].parse_page_table(KERNEL);
+        let p1 = p2.as_mut_slice::<PTE>(512)[Self::index(2, start)].parse_page_table(KERNEL);
 
         Ok(Self {
             count: (end.0 - start.0) >> 12,
@@ -137,7 +147,7 @@ impl<const Kernel: bool> PTEIterator<'_, Kernel> {
             i3: Self::index(3, start) as u16,
             i2: Self::index(2, start) as u16,
             i1: Self::index(1, start) as u16,
-            p4: pt.clone(),
+            p4,
             p3,
             p2,
             p1,
@@ -156,15 +166,17 @@ impl<const Kernel: bool> PTEIterator<'_, Kernel> {
     }
 }
 
-impl<'lt, const Kernel: bool> Iterator for PTEIterator<'lt, Kernel> {
+impl<'lt, const KERNEL: bool> Iterator for PTEIterator<'lt, KERNEL> {
     type Item = &'lt mut PTE;
 
     fn next(&mut self) -> Option<Self::Item> {
-        if self.count == 0 {
+        if self.count != 0 {
+            self.count -= 1;
+        } else {
             return None;
         }
 
-        let retval = &mut self.p1.as_page_table()[self.i1 as usize];
+        let retval = &mut self.p1.as_mut_slice::<PTE>(512)[self.i1 as usize];
         self.i1 = (self.i1 + 1) % 512;
         if self.i1 == 0 {
             self.i2 = (self.i2 + 1) % 512;
@@ -176,11 +188,12 @@ impl<'lt, const Kernel: bool> Iterator for PTEIterator<'lt, Kernel> {
                         panic!("PTEIterator: out of range");
                     }
                 }
-                self.p3 = self.p4.as_page_table()[self.i4 as usize].parse_page_table(Kernel);
+                self.p3 =
+                    self.p4.as_mut_slice::<PTE>(512)[self.i4 as usize].parse_page_table(KERNEL);
             }
-            self.p2 = self.p3.as_page_table()[self.i3 as usize].parse_page_table(Kernel);
+            self.p2 = self.p3.as_mut_slice::<PTE>(512)[self.i3 as usize].parse_page_table(KERNEL);
         }
-        self.p1 = self.p2.as_page_table()[self.i2 as usize].parse_page_table(Kernel);
+        self.p1 = self.p2.as_mut_slice::<PTE>(512)[self.i2 as usize].parse_page_table(KERNEL);
         Some(retval)
     }
 }
@@ -193,19 +206,19 @@ impl PageTable {
         let kernel_space_page_table = CachedPP::new(KERNEL_PML4 as usize);
         unsafe {
             page.as_cached()
-                .as_ptr::<()>()
+                .as_ptr::<u8>()
                 .copy_from_nonoverlapping(kernel_space_page_table.as_ptr(), page.len())
         };
 
         Self { page }
     }
 
-    pub fn iter_user(&self, range: VRange) -> PTEIterator<'_, false> {
-        PTEIterator::new(self.page.clone(), range.start().floor(), range.end().ceil()).unwrap()
+    pub fn iter_user(&self, range: VRange) -> KResult<PTEIterator<'_, false>> {
+        PTEIterator::new(&self.page, range.start().floor(), range.end().ceil())
     }
 
-    pub fn iter_kernel(&self, range: VRange) -> PTEIterator<'_, true> {
-        PTEIterator::new(self.page.clone(), range.start().floor(), range.end().ceil()).unwrap()
+    pub fn iter_kernel(&self, range: VRange) -> KResult<PTEIterator<'_, true>> {
+        PTEIterator::new(&self.page, range.start().floor(), range.end().ceil())
     }
 
     pub fn switch(&self) {
@@ -215,7 +228,7 @@ impl PageTable {
     pub fn unmap(&self, area: &MMArea) {
         let range = area.range();
         let use_invlpg = range.len() / 4096 < 4;
-        let iter = self.iter_user(range);
+        let iter = self.iter_user(range).unwrap();
 
         if self.page.as_phys() != arch::vm::current_page_table() {
             for pte in iter {
@@ -248,8 +261,8 @@ impl PageTable {
             PA_US | PA_COW | PA_ANON | PA_MMAP | PA_NXE
         };
 
-        for pte in self.iter_user(range) {
-            pte.set(EMPTY_PAGE_PFN, attributes);
+        for pte in self.iter_user(range).unwrap() {
+            pte.set(EMPTY_PAGE.clone().into_pfn(), attributes);
         }
     }
 
@@ -262,15 +275,16 @@ impl PageTable {
             PA_P | PA_US | PA_COW | PA_ANON | PA_NXE
         };
 
-        for pte in self.iter_user(range) {
-            pte.set(EMPTY_PAGE_PFN, attributes);
+        for pte in self.iter_user(range).unwrap() {
+            pte.set(EMPTY_PAGE.clone().into_pfn(), attributes);
         }
     }
 }
 
 fn drop_page_table_recursive(pt: &Page, level: usize) {
     for pte in pt
-        .as_page_table()
+        .as_cached()
+        .as_mut_slice::<PTE>(512)
         .iter_mut()
         .filter(|pte| pte.is_present() && pte.is_user())
     {

+ 0 - 146
src/kernel/mem/paging.cc

@@ -5,10 +5,8 @@
 
 #include <kernel/async/lock.hpp>
 #include <kernel/log.hpp>
-#include <kernel/mem/mm_list.hpp>
 #include <kernel/mem/paging.hpp>
 #include <kernel/mem/slab.hpp>
-#include <kernel/mem/vm_area.hpp>
 #include <kernel/process.hpp>
 
 using namespace types::list;
@@ -16,11 +14,6 @@ using namespace types::list;
 using namespace kernel::async;
 using namespace kernel::mem::paging;
 
-static inline void __page_fault_die(uintptr_t vaddr) {
-    kmsgf("[kernel] kernel panic: invalid memory access to %p", vaddr);
-    freeze();
-}
-
 static struct zone_info {
     page* next;
     std::size_t count;
@@ -261,142 +254,3 @@ page* kernel::mem::paging::pfn_to_page(pfn_t pfn) {
 void kernel::mem::paging::increase_refcount(page* pg) {
     pg->refcount++;
 }
-
-struct fix_entry {
-    uint64_t start;
-    uint64_t length;
-    uint64_t jump_address;
-    uint64_t type;
-};
-
-extern "C" fix_entry FIX_START[], FIX_END[];
-bool page_fault_fix(interrupt_stack* int_stack) {
-    // TODO: type load
-
-    // type store
-    for (fix_entry* fix = FIX_START; fix < FIX_END; fix++) {
-        if (int_stack->v_rip >= fix->start && int_stack->v_rip < fix->start + fix->length) {
-            int_stack->v_rip = fix->jump_address;
-            return true;
-        }
-    }
-
-    return false;
-}
-
-void kernel::mem::paging::handle_page_fault(interrupt_stack* int_stack) {
-    using namespace kernel::mem;
-    using namespace paging;
-
-    auto err = int_stack->error_code;
-
-    uintptr_t vaddr;
-    asm volatile("mov %%cr2, %0" : "=g"(vaddr) : :);
-    auto& mms = current_process->mms;
-
-    auto* mm_area = mms.find(vaddr);
-    if (!mm_area) [[unlikely]] {
-        // user access to address that does not exist
-        if (err & PAGE_FAULT_U)
-            kill_current(SIGSEGV);
-
-        if (!page_fault_fix(int_stack)) {
-            __page_fault_die(vaddr);
-        } else {
-            return;
-        }
-    }
-
-    // user access to a present page caused the fault
-    // check access rights
-    if (err & PAGE_FAULT_U && err & PAGE_FAULT_P) {
-        // write to read only pages
-        if (err & PAGE_FAULT_W && !(mm_area->flags & MM_WRITE))
-            kill_current(SIGSEGV);
-
-        // execute from non-executable pages
-        if (err & PAGE_FAULT_I && !(mm_area->flags & MM_EXECUTE))
-            kill_current(SIGSEGV);
-    }
-
-    auto idx = idx_all(vaddr);
-
-    auto pe = mms.get_page_table()[std::get<1>(idx)];
-    assert(pe.attributes() & PA_P);
-    pe = pe.parse()[std::get<2>(idx)];
-    assert(pe.attributes() & PA_P);
-    pe = pe.parse()[std::get<3>(idx)];
-    assert(pe.attributes() & PA_P);
-    pe = pe.parse()[std::get<4>(idx)];
-
-    bool mmapped = mm_area->flags & MM_MAPPED;
-    assert(!mmapped || mm_area->mapped_file);
-
-    if (!(err & PAGE_FAULT_P) && !mmapped) [[unlikely]] {
-        if (!page_fault_fix(int_stack)) {
-            __page_fault_die(vaddr);
-        } else {
-            return;
-        }
-    }
-
-    pfn_t pfn = pe.pfn();
-    auto attr = pe.attributes();
-
-    page* pg = pfn_to_page(pfn);
-
-    if (attr & PA_COW) {
-        attr &= ~PA_COW;
-        if (mm_area->flags & MM_WRITE)
-            attr |= PA_RW;
-        else
-            attr &= ~PA_RW;
-
-        // if it is a dying page
-        // TODO: use atomic
-        if (pg->refcount == 1) {
-            pe.set(attr, pfn);
-            return;
-        }
-
-        // duplicate the page
-        page* new_page = alloc_page();
-        pfn_t new_pfn = page_to_pfn(new_page);
-        physaddr<void> new_page_addr{new_pfn};
-
-        if (attr & PA_ANON)
-            memset(new_page_addr, 0x00, 0x1000);
-        else
-            memcpy(new_page_addr, physaddr<void>{pfn}, 0x1000);
-
-        attr &= ~(PA_A | PA_ANON);
-        --pg->refcount;
-
-        pe.set(attr, new_pfn);
-        pfn = new_pfn;
-    }
-
-    if (attr & PA_MMAP) {
-        attr |= PA_P;
-
-        size_t offset = (vaddr & ~0xfff) - mm_area->start;
-        char* data = physaddr<char>{pfn};
-
-        int n = fs::fs_read(mm_area->mapped_file.get(), data, 4096, mm_area->file_offset + offset,
-                            4096);
-
-        if (n < 0) {
-            kill_current(SIGBUS);
-            return;
-        }
-
-        // TODO: send SIGBUS if offset is greater than real size
-        if (n != 4096)
-            memset(data + n, 0x00, 4096 - n);
-
-        // TODO: shared mapping
-        attr &= ~PA_MMAP;
-
-        pe.set(attr, pfn);
-    }
-}

+ 36 - 12
src/kernel/mem/paging.rs

@@ -4,11 +4,11 @@ use crate::bindings::root::kernel::mem::paging::{
     pfn_to_page as c_pfn_to_page, PAGE_BUDDY,
 };
 use crate::bindings::root::EFAULT;
+use crate::io::{Buffer, FillResult};
 use crate::kernel::mem::phys;
 use core::fmt;
 
 use super::phys::PhysPtr;
-use super::PTE;
 
 pub struct Page {
     page_ptr: *mut c_page,
@@ -28,12 +28,11 @@ impl Page {
         Self { page_ptr, order }
     }
 
-    /// Get `Page` from `pfn` without increasing the reference count.
+    /// Get `Page` from `pfn`, acquiring the ownership of the page. `refcount` is not increased.
     ///
     /// # Safety
-    ///
-    /// Caller must ensure that the `pfn` is no longer used or there will be a memory leak.
-    pub unsafe fn from_pfn(pfn: usize, order: u32) -> Self {
+    /// Caller must ensure that the pfn is no longer referenced by any other code.
+    pub unsafe fn take_pfn(pfn: usize, order: u32) -> Self {
         let page_ptr = unsafe { c_pfn_to_page(pfn) };
 
         // Only buddy pages can be used here.
@@ -49,12 +48,15 @@ impl Page {
     }
 
     /// Get `Page` from `pfn` and increase the reference count.
-    pub fn get(pfn: usize, order: u32) -> Self {
+    ///
+    /// # Safety
+    /// Caller must ensure that `pfn` refers to a valid physical frame number with `refcount` > 0.
+    pub unsafe fn from_pfn(pfn: usize, order: u32) -> Self {
         // SAFETY: `pfn` is a valid physical frame number with refcount > 0.
         unsafe { Self::increase_refcount(pfn) };
 
-        // SAFETY: `pfn` has increased refcount.
-        unsafe { Self::from_pfn(pfn, order) }
+        // SAFETY: `pfn` has an increased refcount.
+        unsafe { Self::take_pfn(pfn, order) }
     }
 
     /// Consumes the `Page` and returns the physical frame number without dropping the reference
@@ -89,10 +91,6 @@ impl Page {
         }
     }
 
-    pub fn as_page_table<'lt>(&self) -> &'lt mut [PTE; 512] {
-        self.as_cached().as_mut_slice(512).try_into().unwrap()
-    }
-
     /// # Safety
     /// Caller must ensure that the page is properly freed.
     pub unsafe fn increase_refcount(pfn: usize) {
@@ -198,6 +196,32 @@ impl core::fmt::Write for PageBuffer {
     }
 }
 
+impl Buffer for PageBuffer {
+    fn total(&self) -> usize {
+        self.page.len()
+    }
+
+    fn wrote(&self) -> usize {
+        self.len()
+    }
+
+    fn fill(&mut self, data: &[u8]) -> crate::KResult<crate::io::FillResult> {
+        if self.remaining() == 0 {
+            return Ok(FillResult::Full);
+        }
+
+        let len = core::cmp::min(data.len(), self.remaining());
+        self.available_as_slice()[..len].copy_from_slice(&data[..len]);
+        self.consume(len);
+
+        if len < data.len() {
+            Ok(FillResult::Partial(len))
+        } else {
+            Ok(FillResult::Done(len))
+        }
+    }
+}
+
 /// Copy data from a slice to a `Page`
 ///
 /// DONT USE THIS FUNCTION TO COPY DATA TO MMIO ADDRESSES

+ 1 - 90
src/kernel/process.cpp

@@ -1,95 +1,6 @@
-#include <assert.h>
-#include <bits/alltypes.h>
-#include <stdint.h>
-#include <sys/mount.h>
-#include <sys/wait.h>
-
-#include <types/allocator.hpp>
-#include <types/cplusplus.hpp>
-#include <types/elf.hpp>
 #include <types/types.h>
 
-#include <kernel/async/lock.hpp>
-#include <kernel/log.hpp>
-#include <kernel/mem/paging.hpp>
-#include <kernel/process.hpp>
-#include <kernel/vfs.hpp>
-#include <kernel/vfs/dentry.hpp>
-
-extern "C" void late_init_rust(uintptr_t* out_sp, uintptr_t* out_ip);
-
-void NORETURN _kernel_init(kernel::mem::paging::pfn_t kernel_stack_pfn) {
-    kernel::mem::paging::free_pages(kernel_stack_pfn, 9);
-
-    uintptr_t sp, ip;
-    late_init_rust(&sp, &ip);
-
-    asm volatile("sti");
-
-    // ------------------------------------------
-    // interrupt enabled
-    // ------------------------------------------
-
-    int ds = 0x33, cs = 0x2b;
-
-    asm volatile(
-        "mov %0, %%rax\n"
-        "mov %%ax, %%ds\n"
-        "mov %%ax, %%es\n"
-        "mov %%ax, %%fs\n"
-        "mov %%ax, %%gs\n"
-
-        "push %%rax\n"
-        "push %2\n"
-        "push $0x200\n"
-        "push %1\n"
-        "push %3\n"
-
-        "iretq\n"
-        :
-        : "g"(ds), "g"(cs), "g"(sp), "g"(ip)
-        : "eax", "memory");
-
-    freeze();
-}
-
-void NORETURN init_scheduler(kernel::mem::paging::pfn_t kernel_stack_pfn) {
-    procs = new proclist;
-
-    asm volatile(
-        "mov %2, %%rdi\n"
-        "mov %0, %%rsp\n"
-        "sub $24, %%rsp\n"
-        "mov %=f, %%rbx\n"
-        "mov %%rbx, (%%rsp)\n"   // return address
-        "mov %%rbx, 16(%%rsp)\n" // previous frame return address
-        "xor %%rbx, %%rbx\n"
-        "mov %%rbx, 8(%%rsp)\n" // previous frame rbp
-        "mov %%rsp, %%rbp\n"    // current frame rbp
-
-        "push %1\n"
-
-        "mov $0x10, %%ax\n"
-        "mov %%ax, %%ss\n"
-        "mov %%ax, %%ds\n"
-        "mov %%ax, %%es\n"
-        "mov %%ax, %%fs\n"
-        "mov %%ax, %%gs\n"
-
-        "push $0x0\n"
-        "popf\n"
-
-        "ret\n"
-
-        "%=:\n"
-        "ud2"
-        :
-        : "a"(current_thread->kstack.sp), "c"(_kernel_init), "g"(kernel_stack_pfn)
-        : "memory");
-
-    freeze();
-}
-
+// TODO: remove this
 void NORETURN freeze(void) {
     for (;;)
         asm volatile("cli\n\thlt");

+ 2 - 11
src/kernel/syscall.rs

@@ -179,22 +179,13 @@ const SYSCALL_HANDLERS_SIZE: usize = 404;
 static mut SYSCALL_HANDLERS: [Option<SyscallHandler>; SYSCALL_HANDLERS_SIZE] =
     [const { None }; SYSCALL_HANDLERS_SIZE];
 
-#[no_mangle]
-pub extern "C" fn handle_syscall32(
-    no: usize,
-    int_stack: *mut interrupt_stack,
-    mmxregs: *mut mmx_registers,
-) {
+pub fn handle_syscall32(no: usize, int_stack: &mut interrupt_stack, mmxregs: &mut mmx_registers) {
     // SAFETY: `SYSCALL_HANDLERS` are never modified after initialization.
     let syscall = unsafe { SYSCALL_HANDLERS.get(no) }.and_then(Option::as_ref);
 
-    // SAFETY: `int_stack` and `mmx_registers` are always valid.
-    let int_stack = unsafe { int_stack.as_mut().unwrap() };
-    let mmxregs = unsafe { mmxregs.as_mut().unwrap() };
-
     match syscall {
         None => {
-            println_warn!("Syscall {} isn't implemented", no);
+            println_warn!("Syscall {no}({no:#x}) isn't implemented");
             ProcessList::kill_current(Signal::SIGSYS);
         }
         Some(handler) => {

+ 15 - 4
src/kernel/syscall/mm.rs

@@ -52,8 +52,8 @@ fn do_mmap_pgoff(
 
     // TODO!!!: If we are doing mmap's in 32-bit mode, we should check whether
     //          `addr` is above user reachable memory.
-    mm_list
-        .mmap(
+    let addr = if flags.contains(UserMmapFlags::MAP_FIXED) {
+        mm_list.mmap_fixed(
             addr,
             len,
             Mapping::Anonymous,
@@ -61,9 +61,20 @@ fn do_mmap_pgoff(
                 write: prot.contains(UserMmapProtocol::PROT_WRITE),
                 execute: prot.contains(UserMmapProtocol::PROT_EXEC),
             },
-            flags.contains(UserMmapFlags::MAP_FIXED),
         )
-        .map(|addr| addr.0)
+    } else {
+        mm_list.mmap_hint(
+            addr,
+            len,
+            Mapping::Anonymous,
+            Permission {
+                write: prot.contains(UserMmapProtocol::PROT_WRITE),
+                execute: prot.contains(UserMmapProtocol::PROT_EXEC),
+            },
+        )
+    };
+
+    addr.map(|addr| addr.0)
 }
 
 fn do_munmap(addr: usize, len: usize) -> KResult<usize> {

+ 23 - 30
src/kernel/syscall/procops.rs

@@ -1,14 +1,14 @@
 use alloc::borrow::ToOwned;
 use alloc::ffi::CString;
-use alloc::sync::Arc;
-use bindings::types::elf::{elf32_load, elf32_load_data, ELF_LOAD_FAIL_NORETURN};
 use bindings::{interrupt_stack, mmx_registers, EINVAL, ENOENT, ENOTDIR, ESRCH};
 use bitflags::bitflags;
 
+use crate::elf::ParsedElf32;
 use crate::io::Buffer;
 use crate::kernel::constants::{PR_GET_NAME, PR_SET_NAME, SIG_BLOCK, SIG_SETMASK, SIG_UNBLOCK};
+use crate::kernel::mem::VAddr;
 use crate::kernel::task::{
-    Process, ProcessList, Scheduler, Signal, SignalAction, Thread, UserDescriptor, WaitObject,
+    ProcessList, Scheduler, Signal, SignalAction, Thread, UserDescriptor, WaitObject,
 };
 use crate::kernel::user::dataflow::UserString;
 use crate::kernel::user::{UserPointer, UserPointerMut};
@@ -77,35 +77,28 @@ fn do_mount(source: *const u8, target: *const u8, fstype: *const u8, flags: usiz
 }
 
 /// # Return
-/// `(ip, sp)`
-fn do_execve(exec: &[u8], argv: &[CString], envp: &[CString]) -> KResult<(usize, usize)> {
-    let context = FsContext::get_current();
-    let dentry = Dentry::open(&context, Path::new(exec)?, true)?;
+/// `(entry_ip, sp)`
+fn do_execve(exec: &[u8], argv: Vec<CString>, envp: Vec<CString>) -> KResult<(VAddr, VAddr)> {
+    let dentry = Dentry::open(&FsContext::get_current(), Path::new(exec)?, true)?;
     if !dentry.is_valid() {
         return Err(ENOENT);
     }
 
-    let argv_array = argv.iter().map(|x| x.as_ptr()).collect::<Vec<_>>();
-    let envp_array = envp.iter().map(|x| x.as_ptr()).collect::<Vec<_>>();
-
-    let mut load_data = elf32_load_data {
-        exec_dent: Arc::into_raw(dentry) as *mut _,
-        argv: argv_array.as_ptr(),
-        argv_count: argv_array.len(),
-        envp: envp_array.as_ptr(),
-        envp_count: envp_array.len(),
-        ip: 0,
-        sp: 0,
-    };
+    // TODO: When `execve` is called by one of the threads in a process, the other threads
+    //       should be terminated and `execve` is performed in the thread group leader.
+    let elf = ParsedElf32::parse(dentry.clone())?;
+    let result = elf.load(&Thread::current().process.mm_list, argv, envp);
+    if let Ok((ip, sp)) = result {
+        Thread::current().files.on_exec();
+        Thread::current().signal_list.clear_non_ignore();
+        Thread::current().set_name(dentry.name().clone());
 
-    Thread::current().files.on_exec();
-    match unsafe { elf32_load(&mut load_data) } {
-        ELF_LOAD_FAIL_NORETURN => ProcessList::kill_current(Signal::SIGSEGV),
-        0 => {
-            Thread::current().signal_list.clear_non_ignore();
-            Ok((load_data.ip, load_data.sp))
-        }
-        n => Err(-n as u32),
+        Ok((ip, sp))
+    } else {
+        drop(dentry);
+
+        // We can't hold any ownership when we call `kill_current`.
+        ProcessList::kill_current(Signal::SIGSEGV);
     }
 }
 
@@ -143,10 +136,10 @@ fn sys_execve(int_stack: &mut interrupt_stack, _mmxregs: &mut mmx_registers) ->
             envp = envp.offset(1)?;
         }
 
-        let (ip, sp) = do_execve(exec.as_cstr().to_bytes(), &argv_vec, &envp_vec)?;
+        let (ip, sp) = do_execve(exec.as_cstr().to_bytes(), argv_vec, envp_vec)?;
 
-        int_stack.v_rip = ip;
-        int_stack.rsp = sp;
+        int_stack.v_rip = ip.0;
+        int_stack.rsp = sp.0;
         Ok(())
     })() {
         Ok(_) => 0,

+ 2 - 2
src/kernel/task.rs

@@ -8,6 +8,6 @@ pub(self) use kstack::KernelStack;
 pub use scheduler::Scheduler;
 pub use signal::{Signal, SignalAction};
 pub use thread::{
-    Process, ProcessGroup, ProcessList, Session, Thread, ThreadState, UserDescriptor,
-    UserDescriptorFlags, WaitObject,
+    init_multitasking, Process, ProcessGroup, ProcessList, Session, Thread, ThreadState,
+    UserDescriptor, UserDescriptorFlags, WaitObject,
 };

+ 1 - 1
src/kernel/task/kstack.rs

@@ -68,7 +68,7 @@ impl<'lt> KernelStackWriter<'lt> {
 
         // SAFETY: `sp` is always valid.
         unsafe {
-            (self.sp as *mut usize).write(val);
+            (*self.sp as *mut usize).write(val);
         }
     }
 

+ 15 - 1
src/kernel/task/scheduler.rs

@@ -6,6 +6,7 @@ use alloc::{
     collections::vec_deque::VecDeque,
     sync::{Arc, Weak},
 };
+use lazy_static::lazy_static;
 
 use super::{Thread, ThreadState};
 
@@ -29,6 +30,12 @@ static mut IDLE_TASK: Option<Arc<Thread>> = None;
 /// TODO!!!: This should be per cpu in smp environment.
 static mut CURRENT: Option<Arc<Thread>> = None;
 
+lazy_static! {
+    static ref GLOBAL_SCHEDULER: Spin<Scheduler> = Spin::new(Scheduler {
+        ready: VecDeque::new(),
+    });
+}
+
 impl Scheduler {
     /// `Scheduler` might be used in various places. Do not hold it for a long time.
     ///
@@ -38,7 +45,7 @@ impl Scheduler {
     ///
     /// Drop the lock before calling `schedule`.
     pub fn get() -> &'static Spin<Self> {
-        todo!()
+        &GLOBAL_SCHEDULER
     }
 
     pub fn current<'lt>() -> &'lt Arc<Thread> {
@@ -52,6 +59,13 @@ impl Scheduler {
     }
 
     pub(super) fn set_idle(thread: Arc<Thread>) {
+        thread.prepare_kernel_stack(|kstack| {
+            let mut writer = kstack.get_writer();
+            writer.flags = 0x200;
+            writer.entry = idle_task;
+            writer.finish();
+        });
+
         // TODO!!!: Set per cpu variable.
         unsafe { IDLE_TASK = Some(thread) };
     }

+ 7 - 8
src/kernel/task/signal.rs

@@ -4,12 +4,12 @@ use crate::{io::BufferFill, kernel::user::dataflow::UserBuffer, prelude::*};
 
 use alloc::collections::{binary_heap::BinaryHeap, btree_map::BTreeMap};
 use bindings::{
-    interrupt_stack, kill_current, mmx_registers, EFAULT, EINVAL, SA_RESTORER, SIGABRT, SIGBUS,
-    SIGCHLD, SIGCONT, SIGFPE, SIGILL, SIGKILL, SIGQUIT, SIGSEGV, SIGSTOP, SIGSYS, SIGTRAP, SIGTSTP,
-    SIGTTIN, SIGTTOU, SIGURG, SIGWINCH, SIGXCPU, SIGXFSZ,
+    interrupt_stack, mmx_registers, EFAULT, EINVAL, SA_RESTORER, SIGABRT, SIGBUS, SIGCHLD, SIGCONT,
+    SIGFPE, SIGILL, SIGKILL, SIGQUIT, SIGSEGV, SIGSTOP, SIGSYS, SIGTRAP, SIGTSTP, SIGTTIN, SIGTTOU,
+    SIGURG, SIGWINCH, SIGXCPU, SIGXFSZ,
 };
 
-use super::Thread;
+use super::{ProcessList, Thread};
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
 pub struct Signal(u32);
@@ -385,11 +385,10 @@ impl SignalList {
 
 // TODO!!!: Should we use `uwake` or `iwake`?
 fn terminate_process(signal: Signal) -> ! {
-    unsafe { kill_current(signal.to_signum() as i32) };
+    ProcessList::kill_current(signal)
 }
 
+// TODO!!!!!!: Check exit status format.
 fn terminate_process_core_dump(signal: Signal) -> ! {
-    unsafe { kill_current(signal.to_signum() as i32 & 0x80) };
+    ProcessList::kill_current(signal)
 }
-
-fn schedule() {}

+ 46 - 55
src/kernel/task/thread.rs

@@ -24,6 +24,7 @@ use alloc::{
     sync::{Arc, Weak},
 };
 use bindings::{ECHILD, EINTR, EINVAL, EPERM, ESRCH};
+use lazy_static::lazy_static;
 
 use crate::kernel::vfs::filearray::FileArray;
 
@@ -221,7 +222,7 @@ pub struct ProcessList {
 
 impl Session {
     fn new(sid: u32, leader: Weak<Process>) -> Arc<Self> {
-        let session = Arc::new(Self {
+        Arc::new(Self {
             sid,
             leader,
             inner: Spin::new(SessionInner {
@@ -229,10 +230,7 @@ impl Session {
                 control_terminal: None,
                 groups: BTreeMap::new(),
             }),
-        });
-
-        ProcessList::get().add_session(&session);
-        session
+        })
     }
 
     fn add_member(&self, pgroup: &Arc<ProcessGroup>) {
@@ -270,15 +268,12 @@ impl Session {
 
 impl ProcessGroup {
     fn new_for_init(pgid: u32, leader: Weak<Process>, session: Weak<Session>) -> Arc<Self> {
-        let pgroup = Arc::new(Self {
+        Arc::new(Self {
             pgid,
             leader: leader.clone(),
             session,
             processes: Spin::new(BTreeMap::from([(pgid, leader)])),
-        });
-
-        ProcessList::get().add_pgroup(&pgroup);
-        pgroup
+        })
     }
 
     fn new(leader: &Arc<Process>, session: &Arc<Session>) -> Arc<Self> {
@@ -289,7 +284,6 @@ impl ProcessGroup {
             processes: Spin::new(BTreeMap::from([(leader.pid, Arc::downgrade(leader))])),
         });
 
-        ProcessList::get().add_pgroup(&pgroup);
         session.add_member(&pgroup);
         pgroup
     }
@@ -324,9 +318,38 @@ impl Drop for ProcessGroup {
     }
 }
 
+lazy_static! {
+    static ref GLOBAL_PROC_LIST: ProcessList = {
+        let init_process = Process::new_for_init(1, None);
+        let init_thread = Thread::new_for_init(b"[kernel kinit]".as_slice().into(), &init_process);
+        Scheduler::set_current(init_thread.clone());
+
+        let idle_process = Process::new_for_init(0, None);
+        let idle_thread =
+            Thread::new_for_init(b"[kernel idle#BS]".as_slice().into(), &idle_process);
+        Scheduler::set_idle(idle_thread.clone());
+
+        let init_session_weak = Arc::downgrade(&init_process.inner.lock().session);
+        let init_pgroup_weak = Arc::downgrade(&init_process.inner.lock().pgroup);
+
+        ProcessList {
+            sessions: Spin::new(BTreeMap::from([(1, init_session_weak)])),
+            pgroups: Spin::new(BTreeMap::from([(1, init_pgroup_weak)])),
+            threads: Spin::new(BTreeMap::from([
+                (1, init_thread.clone()),
+                (0, idle_thread.clone()),
+            ])),
+            processes: Spin::new(BTreeMap::from([
+                (1, Arc::downgrade(&init_process)),
+                (0, Arc::downgrade(&idle_process)),
+            ])),
+            init: init_process,
+        }
+    };
+}
 impl ProcessList {
     pub fn get() -> &'static Self {
-        todo!()
+        &GLOBAL_PROC_LIST
     }
 
     pub fn add_session(&self, session: &Arc<Session>) {
@@ -360,31 +383,6 @@ impl ProcessList {
         Scheduler::schedule_noreturn()
     }
 
-    fn new() -> Self {
-        let init_process = Process::new_for_init(1, None);
-        let init_thread = Thread::new_for_init(b"[kernel kinit]".as_slice().into(), &init_process);
-        Scheduler::set_current(init_thread.clone());
-
-        let idle_process = Process::new_for_init(0, None);
-        let idle_thread =
-            Thread::new_for_init(b"[kernel idle#BS]".as_slice().into(), &idle_process);
-        Scheduler::set_idle(idle_thread.clone());
-
-        Self {
-            sessions: Spin::new(BTreeMap::new()),
-            pgroups: Spin::new(BTreeMap::new()),
-            threads: Spin::new(BTreeMap::from([
-                (1, init_thread.clone()),
-                (0, idle_thread.clone()),
-            ])),
-            processes: Spin::new(BTreeMap::from([
-                (1, Arc::downgrade(&init_process)),
-                (0, Arc::downgrade(&idle_process)),
-            ])),
-            init: init_process,
-        }
-    }
-
     // TODO!!!!!!: Reconsider this
     fn remove(&self, tid: u32) {
         if let None = self.threads.lock().remove(&tid) {
@@ -557,7 +555,6 @@ impl Process {
             }
         });
 
-        ProcessList::get().add_process(&process);
         process.inner.lock().pgroup.add_member(&process);
         process
     }
@@ -635,8 +632,12 @@ impl Process {
             return Err(EPERM);
         }
         inner.session = Session::new(self.pid, Arc::downgrade(self));
+        ProcessList::get().add_session(&inner.session);
+
         inner.pgroup.remove_member(self.pid);
         inner.pgroup = ProcessGroup::new(self, &inner.session);
+        ProcessList::get().add_pgroup(&inner.pgroup);
+
         Ok(inner.pgroup.pgid)
     }
 
@@ -672,8 +673,11 @@ impl Process {
             }
 
             inner.session = Session::new(self.pid, Arc::downgrade(self));
+            ProcessList::get().add_session(&inner.session);
+
             inner.pgroup.remove_member(self.pid);
             inner.pgroup = ProcessGroup::new(self, &inner.session);
+            ProcessList::get().add_pgroup(&inner.pgroup);
         }
 
         Ok(())
@@ -772,7 +776,6 @@ impl Thread {
             }),
         });
 
-        ProcessList::get().add_thread(&thread);
         process.add_thread(&thread);
         thread
     }
@@ -961,22 +964,10 @@ impl Process {
     }
 }
 
-// TODO!!!!!!: impl this
-fn init_scheduler() {
-    let process_list = ProcessList::new();
+pub fn init_multitasking() {
+    // Lazy init
+    assert!(ProcessList::get().try_find_thread(1).is_some());
+
     Thread::current().load_interrupt_stack();
     Thread::current().process.mm_list.switch_page_table();
-
-    Scheduler::idle_task().prepare_kernel_stack(|kstack| {
-        let mut writer = kstack.get_writer();
-        writer.flags = 0x200;
-        writer.entry = idle_task;
-        writer.finish();
-    });
-}
-
-extern "C" fn idle_task() {
-    loop {
-        arch::task::halt();
-    }
 }

+ 1 - 65
src/kernel/terminal.rs

@@ -104,62 +104,10 @@ bitflags! {
 }
 
 /* c_cflag bit meaning */
-/* Common CBAUD rates */
-const B0: u32 = 0x00000000; /* hang up */
-const B50: u32 = 0x00000001;
-const B75: u32 = 0x00000002;
-const B110: u32 = 0x00000003;
-const B134: u32 = 0x00000004;
-const B150: u32 = 0x00000005;
-const B200: u32 = 0x00000006;
-const B300: u32 = 0x00000007;
-const B600: u32 = 0x00000008;
-const B1200: u32 = 0x00000009;
-const B1800: u32 = 0x0000000a;
-const B2400: u32 = 0x0000000b;
-const B4800: u32 = 0x0000000c;
-const B9600: u32 = 0x0000000d;
-const B19200: u32 = 0x0000000e;
 const B38400: u32 = 0x0000000f;
-const EXTA: u32 = B19200;
-const EXTB: u32 = B38400;
-
-const ADDRB: u32 = 0x20000000; /* address bit */
-const CMSPAR: u32 = 0x40000000; /* mark or space (stick) parity */
-const CRTSCTS: u32 = 0x80000000; /* flow control */
-
-const IBSHIFT: u32 = 16; /* Shift from CBAUD to CIBAUD */
-
-const CBAUD: u32 = 0x0000100f;
-const CSIZE: u32 = 0x00000030;
-const CS5: u32 = 0x00000000;
-const CS6: u32 = 0x00000010;
-const CS7: u32 = 0x00000020;
 const CS8: u32 = 0x00000030;
-const CSTOPB: u32 = 0x00000040;
 const CREAD: u32 = 0x00000080;
-const PARENB: u32 = 0x00000100;
-const PARODD: u32 = 0x00000200;
 const HUPCL: u32 = 0x00000400;
-const CLOCAL: u32 = 0x00000800;
-const CBAUDEX: u32 = 0x00001000;
-const BOTHER: u32 = 0x00001000;
-const B57600: u32 = 0x00001001;
-const B115200: u32 = 0x00001002;
-const B230400: u32 = 0x00001003;
-const B460800: u32 = 0x00001004;
-const B500000: u32 = 0x00001005;
-const B576000: u32 = 0x00001006;
-const B921600: u32 = 0x00001007;
-const B1000000: u32 = 0x00001008;
-const B1152000: u32 = 0x00001009;
-const B1500000: u32 = 0x0000100a;
-const B2000000: u32 = 0x0000100b;
-const B2500000: u32 = 0x0000100c;
-const B3000000: u32 = 0x0000100d;
-const B3500000: u32 = 0x0000100e;
-const B4000000: u32 = 0x0000100f;
-const CIBAUD: u32 = 0x100f0000; /* input baud rate */
 
 // line disciplines
 
@@ -260,10 +208,6 @@ macro_rules! CTRL {
 }
 
 impl Termios {
-    pub fn ctrl(c: u8) -> u8 {
-        c - 0x40
-    }
-
     pub fn veof(&self) -> u8 {
         self.cc[VEOF]
     }
@@ -288,14 +232,6 @@ impl Termios {
         self.cc[VSUSP]
     }
 
-    pub fn vstart(&self) -> u8 {
-        self.cc[VSTART]
-    }
-
-    pub fn vstop(&self) -> u8 {
-        self.cc[VSTOP]
-    }
-
     pub fn verase(&self) -> u8 {
         self.cc[VERASE]
     }
@@ -616,7 +552,7 @@ impl Terminal {
 
             if !inner.termio.icanon() {
                 let ch = inner.buffer.pop_front().unwrap();
-                buffer.fill(&[ch])?;
+                buffer.fill(&[ch])?.allow_partial();
                 break 'block;
             }
 

+ 5 - 2
src/kernel/timer.rs

@@ -1,8 +1,8 @@
 use core::sync::atomic::{AtomicUsize, Ordering};
 
-use crate::prelude::*;
+use crate::{prelude::*, sync::preempt};
 
-use super::interrupt::register_irq_handler;
+use super::{interrupt::register_irq_handler, task::Scheduler};
 
 static TICKS: AtomicUsize = AtomicUsize::new(0);
 
@@ -28,6 +28,9 @@ impl Ticks {
 
 fn timer_interrupt() {
     TICKS.fetch_add(1, Ordering::Relaxed);
+    if preempt::count() == 0 {
+        Scheduler::schedule();
+    }
 }
 
 pub fn ticks() -> Ticks {

+ 0 - 76
src/kernel/vfs.cpp

@@ -1,76 +0,0 @@
-#include <cstddef>
-
-#include <assert.h>
-#include <bits/alltypes.h>
-#include <errno.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <sys/mount.h>
-#include <sys/types.h>
-
-#include <types/allocator.hpp>
-#include <types/path.hpp>
-
-#include <kernel/log.hpp>
-#include <kernel/process.hpp>
-#include <kernel/vfs.hpp>
-#include <kernel/vfs/dentry.hpp>
-
-static fs::chrdev_ops** chrdevs[256];
-
-int fs::register_char_device(dev_t node, const fs::chrdev_ops& ops) {
-    int major = NODE_MAJOR(node);
-    int minor = NODE_MINOR(node);
-
-    if (!chrdevs[major])
-        chrdevs[major] = new chrdev_ops* [256] {};
-
-    if (chrdevs[major][minor])
-        return -EEXIST;
-
-    chrdevs[major][minor] = new chrdev_ops{ops};
-    return 0;
-}
-
-ssize_t fs::char_device_read(dev_t node, char* buf, size_t buf_size, size_t n) {
-    int major = NODE_MAJOR(node);
-    int minor = NODE_MINOR(node);
-
-    if (!chrdevs[major] || !chrdevs[major][minor])
-        return -EINVAL;
-
-    auto& read = chrdevs[major][minor]->read;
-    if (!read)
-        return -EINVAL;
-
-    return read(buf, buf_size, n);
-}
-
-ssize_t fs::char_device_write(dev_t node, const char* buf, size_t n) {
-    int major = NODE_MAJOR(node);
-    int minor = NODE_MINOR(node);
-
-    if (!chrdevs[major] || !chrdevs[major][minor])
-        return -EINVAL;
-
-    auto& write = chrdevs[major][minor]->write;
-    if (!write)
-        return -EINVAL;
-
-    return write(buf, n);
-}
-
-extern "C" void r_dput(struct dentry* dentry);
-extern "C" struct dentry* r_dget(struct dentry* dentry);
-
-void fs::dentry_deleter::operator()(struct dentry* dentry) const {
-    if (dentry)
-        r_dput(dentry);
-}
-
-fs::dentry_pointer fs::d_get(const dentry_pointer& dp) {
-    if (!dp)
-        return nullptr;
-
-    return dentry_pointer{r_dget(dp.get())};
-}

+ 7 - 80
src/kernel/vfs/dentry.rs

@@ -9,14 +9,16 @@ use core::{
 use crate::{
     hash::KernelHasher,
     io::{Buffer, ByteBuffer},
-    kernel::block::BlockDevice,
+    kernel::{block::BlockDevice, CharDevice},
     path::{Path, PathComponent},
     prelude::*,
     rcu::{RCUNode, RCUPointer},
 };
 
 use alloc::sync::Arc;
-use bindings::{statx, EEXIST, EINVAL, EISDIR, ELOOP, ENOENT, ENOTDIR, ERANGE, O_CREAT, O_EXCL};
+use bindings::{
+    statx, EEXIST, EINVAL, EISDIR, ELOOP, ENOENT, ENOTDIR, EPERM, ERANGE, O_CREAT, O_EXCL,
+};
 
 use super::{
     inode::{Ino, Inode, Mode, WriteOffset},
@@ -178,13 +180,6 @@ impl Dentry {
             .map(|data| data.inode.clone())
     }
 
-    /// This function is used to get the **borrowed** dentry from a raw pointer
-    pub fn from_raw(raw: &*const Self) -> BorrowedArc<Self> {
-        assert!(!raw.is_null());
-
-        BorrowedArc::new(raw)
-    }
-
     pub fn is_directory(&self) -> bool {
         let data = self.data.load();
         data.as_ref()
@@ -359,27 +354,8 @@ impl Dentry {
                 Ok(device.read_some(offset, buffer)?.allow_partial())
             }
             mode if s_ischr(mode) => {
-                let devid = inode.devid()?;
-
-                // TODO!!!!!: change this
-                let mut temporary_buffer = [0u8; 256];
-
-                let ret = unsafe {
-                    bindings::fs::char_device_read(
-                        devid,
-                        temporary_buffer.as_mut_ptr() as *mut _,
-                        temporary_buffer.len(),
-                        temporary_buffer.len(),
-                    )
-                };
-
-                if ret < 0 {
-                    Err(-ret as u32)
-                } else {
-                    Ok(buffer
-                        .fill(&temporary_buffer[..ret as usize])?
-                        .allow_partial())
-                }
+                let device = CharDevice::get(inode.devid()?).ok_or(EPERM)?;
+                device.read(buffer)
             }
             _ => Err(EINVAL),
         }
@@ -392,23 +368,7 @@ impl Dentry {
             mode if s_isdir(mode) => Err(EISDIR),
             mode if s_isreg(mode) => inode.write(buffer, offset),
             mode if s_isblk(mode) => Err(EINVAL), // TODO
-            mode if s_ischr(mode) => {
-                let devid = inode.devid()?;
-
-                let ret = unsafe {
-                    bindings::fs::char_device_write(
-                        devid,
-                        buffer.as_ptr() as *const _,
-                        buffer.len(),
-                    )
-                };
-
-                if ret < 0 {
-                    Err(-ret as u32)
-                } else {
-                    Ok(ret as usize)
-                }
-            }
+            mode if s_ischr(mode) => CharDevice::get(inode.devid()?).ok_or(EPERM)?.write(buffer),
             _ => Err(EINVAL),
         }
     }
@@ -464,36 +424,3 @@ impl Dentry {
         }
     }
 }
-
-#[no_mangle]
-pub extern "C" fn r_dget(dentry: *const Dentry) -> *const Dentry {
-    debug_assert!(!dentry.is_null());
-
-    unsafe { Arc::increment_strong_count(dentry) };
-    dentry
-}
-
-#[no_mangle]
-pub extern "C" fn r_dput(dentry: *const Dentry) {
-    debug_assert!(!dentry.is_null());
-
-    unsafe { Arc::from_raw(dentry) };
-}
-
-#[no_mangle]
-pub extern "C" fn r_dentry_is_directory(dentry: *const Dentry) -> bool {
-    let dentry = Dentry::from_raw(&dentry);
-
-    dentry
-        .data
-        .load()
-        .as_ref()
-        .map_or(false, |data| data.flags & D_DIRECTORY != 0)
-}
-
-#[no_mangle]
-pub extern "C" fn r_dentry_is_invalid(dentry: *const Dentry) -> bool {
-    let dentry = Dentry::from_raw(&dentry);
-
-    dentry.data.load().is_none()
-}

+ 0 - 22
src/kernel/vfs/ffi.rs

@@ -1,22 +0,0 @@
-use crate::io::RawBuffer;
-
-use super::{dentry::Dentry, inode::Inode};
-
-#[no_mangle]
-pub extern "C" fn fs_read(
-    file: *const Dentry, // borrowed
-    buf: *mut u8,
-    bufsize: usize,
-    offset: usize,
-    n: usize,
-) -> isize {
-    let file = Dentry::from_raw(&file);
-
-    let bufsize = bufsize.min(n);
-    let mut buffer = RawBuffer::new_from_raw(buf, bufsize);
-
-    match file.read(&mut buffer, offset) {
-        Ok(n) => n as isize,
-        Err(e) => -(e as isize),
-    }
-}

+ 0 - 30
src/kernel/vfs/filearray.rs

@@ -54,36 +54,6 @@ impl OpenFile {
     }
 }
 
-#[no_mangle]
-pub extern "C" fn r_filearray_new_for_init() -> *const FileArray {
-    Arc::into_raw(Arc::new(FileArray {
-        inner: Spin::new(FileArrayInner {
-            files: BTreeMap::new(),
-            fd_min_avail: 0,
-        }),
-    }))
-}
-
-#[no_mangle]
-pub extern "C" fn r_filearray_new_shared(other: *const FileArray) -> *const FileArray {
-    let other = BorrowedArc::from_raw(other);
-
-    Arc::into_raw(FileArray::new_shared(&other))
-}
-
-#[no_mangle]
-pub extern "C" fn r_filearray_new_cloned(other: *const FileArray) -> *const FileArray {
-    let other = BorrowedArc::from_raw(other);
-
-    Arc::into_raw(FileArray::new_cloned(&other))
-}
-
-#[no_mangle]
-pub extern "C" fn r_filearray_drop(other: *const FileArray) {
-    // SAFETY: `other` is a valid pointer from `Arc::into_raw()`.
-    unsafe { Arc::from_raw(other) };
-}
-
 impl FileArray {
     pub fn get_current<'lt>() -> &'lt Arc<Self> {
         &Thread::current().files

+ 0 - 1
src/kernel/vfs/mod.rs

@@ -8,7 +8,6 @@ use inode::Mode;
 use super::task::Thread;
 
 pub mod dentry;
-pub mod ffi;
 pub mod file;
 pub mod filearray;
 pub mod inode;

+ 10 - 28
src/kinit.cpp

@@ -1,4 +1,3 @@
-#include <assert.h>
 #include <stdint.h>
 #include <sys/utsname.h>
 
@@ -7,7 +6,6 @@
 
 #include <kernel/hw/acpi.hpp>
 #include <kernel/hw/pci.hpp>
-#include <kernel/hw/timer.hpp>
 #include <kernel/interrupt.hpp>
 #include <kernel/log.hpp>
 #include <kernel/mem/paging.hpp>
@@ -48,23 +46,6 @@ static inline void enable_sse() {
             : "rax");
 }
 
-void NORETURN real_kernel_init(mem::paging::pfn_t kernel_stack_pfn) {
-    // call global constructors
-    // NOTE: the initializer of global objects MUST NOT contain
-    // all kinds of memory allocations
-    for (auto* ctor = &start_ctors; ctor != &end_ctors; ++ctor)
-        (*ctor)();
-
-    init_interrupt();
-    hw::timer::init_pit();
-
-    hw::acpi::parse_acpi_tables();
-
-    init_pci();
-
-    init_scheduler(kernel_stack_pfn);
-}
-
 static inline void setup_early_kernel_page_table() {
     using namespace kernel::mem::paging;
 
@@ -81,12 +62,9 @@ static inline void setup_early_kernel_page_table() {
 
     // clear kernel bss
     memset((void*)BSS_ADDR, 0x00, BSS_LENGTH);
-
-    // clear empty page
-    memset(mem::physaddr<void>{(uintptr_t)EMPTY_PAGE_PFN}, 0x00, 0x1000);
 }
 
-extern "C" uintptr_t KIMAGE_PAGES_VALUE;
+extern "C" char KIMAGE_PAGES[];
 
 static inline void setup_buddy(uintptr_t addr_max) {
     using namespace kernel::mem;
@@ -97,6 +75,7 @@ static inline void setup_buddy(uintptr_t addr_max) {
     addr_max >>= 12;
     int count = (addr_max * sizeof(page) + 0x200000 - 1) / 0x200000;
 
+    auto KIMAGE_PAGES_VALUE = (size_t)KIMAGE_PAGES;
     pfn_t real_start_pfn = KERNEL_IMAGE_PADDR + KIMAGE_PAGES_VALUE * 0x1000;
     pfn_t aligned_start_pfn = real_start_pfn + 0x200000 - 1;
     aligned_start_pfn &= ~0x1fffff;
@@ -155,7 +134,7 @@ static inline void save_memory_info(bootloader_data* data) {
            sizeof(kernel::mem::info::e820_entries));
 }
 
-void setup_gdt() {
+static inline void setup_gdt() {
     // user code
     mem::gdt[3] = 0x0020'fa00'0000'0000;
     // user data
@@ -193,6 +172,8 @@ void setup_gdt() {
         : "ax", "memory");
 }
 
+extern "C" void rust_kinit(uintptr_t early_kstack_vaddr);
+
 extern "C" void NORETURN kernel_init(bootloader_data* data) {
     enable_sse();
 
@@ -217,12 +198,13 @@ extern "C" void NORETURN kernel_init(bootloader_data* data) {
 
     asm volatile(
         "mov %1, %%rdi\n\t"
-        "mov %2, %%rsp\n\t"
+        "lea -8(%2), %%rsp\n\t"
         "xor %%rbp, %%rbp\n\t"
-        "call *%0\n\t"
+        "mov %%rbp, (%%rsp)\n\t" // Clear previous frame pointer
+        "jmp *%0\n\t"
         :
-        : "r"(real_kernel_init), "g"(kernel_stack_pfn), "g"(kernel_stack_ptr)
-        :);
+        : "r"(rust_kinit), "g"(kernel_stack_pfn), "r"(kernel_stack_ptr)
+        : "memory");
 
     freeze();
 }

+ 129 - 57
src/lib.rs

@@ -11,6 +11,7 @@ extern crate alloc;
 mod bindings;
 
 mod driver;
+mod elf;
 mod fs;
 mod hash;
 mod io;
@@ -21,9 +22,14 @@ mod prelude;
 mod rcu;
 mod sync;
 
-use alloc::{ffi::CString, sync::Arc};
-use bindings::root::types::elf::{elf32_load, elf32_load_data};
+use alloc::ffi::CString;
+use elf::ParsedElf32;
 use kernel::{
+    mem::{
+        paging::Page,
+        phys::{CachedPP, PhysPtr as _},
+    },
+    task::{init_multitasking, Thread},
     vfs::{
         dentry::Dentry,
         mount::{do_mount, MS_NOATIME, MS_NODEV, MS_NOSUID, MS_RDONLY},
@@ -36,16 +42,32 @@ use prelude::*;
 
 #[panic_handler]
 fn panic(info: &core::panic::PanicInfo) -> ! {
-    println_fatal!("panicked at {:?}\n\t\t{}", info.location(), info.message());
+    if let Some(location) = info.location() {
+        println_fatal!(
+            "panicked at {}:{}:{}",
+            location.file(),
+            location.line(),
+            location.column()
+        );
+    } else {
+        println_fatal!("panicked at <UNKNOWN>");
+    }
+    println_fatal!();
+    println_fatal!("{}", info.message());
+
     arch::task::freeze()
 }
 
 extern "C" {
     fn _do_allocate(size: usize) -> *mut core::ffi::c_void;
     fn _do_deallocate(ptr: *mut core::ffi::c_void, size: core::ffi::c_size_t) -> i32;
+    fn init_pci();
 }
 
-use core::alloc::{GlobalAlloc, Layout};
+use core::{
+    alloc::{GlobalAlloc, Layout},
+    arch::{asm, global_asm},
+};
 
 struct Allocator {}
 unsafe impl GlobalAlloc for Allocator {
@@ -70,8 +92,56 @@ unsafe impl GlobalAlloc for Allocator {
 #[global_allocator]
 static ALLOCATOR: Allocator = Allocator {};
 
+global_asm!(
+    r"
+    .globl to_init_process
+    to_init_process:
+        push %rbp
+        mov %rbx, %rdi
+        jmp {}
+    ",
+    sym init_process,
+    options(att_syntax)
+);
+
+extern "C" {
+    fn to_init_process();
+}
+
 #[no_mangle]
-pub extern "C" fn late_init_rust(out_sp: *mut usize, out_ip: *mut usize) {
+pub extern "C" fn rust_kinit(early_kstack_pfn: usize) -> ! {
+    // We don't call global constructors.
+    // Rust doesn't need that, and we're not going to use global variables in C++.
+
+    kernel::interrupt::init().unwrap();
+
+    // TODO: Move this to rust.
+    unsafe { init_pci() };
+
+    kernel::vfs::mount::init_vfs().unwrap();
+
+    // We need root dentry to be present in constructor of `FsContext`.
+    // So call `init_vfs` first, then `init_multitasking`.
+    init_multitasking();
+    Thread::current().prepare_kernel_stack(|kstack| {
+        let mut writer = kstack.get_writer();
+        writer.entry = to_init_process;
+        writer.flags = 0x200;
+        writer.rbp = 0;
+        writer.rbx = early_kstack_pfn; // `to_init_process` arg
+        writer.finish();
+    });
+
+    arch::task::context_switch_light(
+        CachedPP::new(early_kstack_pfn).as_ptr(), // We will never come back
+        unsafe { Thread::current().get_sp_ptr() },
+    );
+    arch::task::freeze()
+}
+
+extern "C" fn init_process(early_kstack_pfn: usize) {
+    unsafe { Page::take_pfn(early_kstack_pfn, 9) };
+
     kernel::timer::init().unwrap();
 
     // Use the PIT timer for now.
@@ -83,66 +153,68 @@ pub extern "C" fn late_init_rust(out_sp: *mut usize, out_ip: *mut usize) {
     // We might want the serial initialized as soon as possible.
     driver::serial::init().unwrap();
 
-    kernel::vfs::mount::init_vfs().unwrap();
-
     driver::e1000e::register_e1000e_driver();
     driver::ahci::register_ahci_driver();
 
     fs::procfs::init();
     fs::fat32::init();
 
-    // mount fat32 /mnt directory
-    let fs_context = FsContext::get_current();
-    let mnt_dir = Dentry::open(&fs_context, Path::new(b"/mnt/").unwrap(), true).unwrap();
-
-    mnt_dir.mkdir(0o755).unwrap();
-
-    do_mount(
-        &mnt_dir,
-        "/dev/sda",
-        "/mnt",
-        "fat32",
-        MS_RDONLY | MS_NOATIME | MS_NODEV | MS_NOSUID,
-    )
-    .unwrap();
-
-    let init = Dentry::open(&fs_context, Path::new(b"/mnt/busybox").unwrap(), true)
-        .expect("kernel panic: init not found!");
-
-    let argv = vec![
-        CString::new("/mnt/busybox").unwrap(),
-        CString::new("sh").unwrap(),
-        CString::new("/mnt/initsh").unwrap(),
-    ];
-
-    let envp = vec![
-        CString::new("LANG=C").unwrap(),
-        CString::new("HOME=/root").unwrap(),
-        CString::new("PATH=/mnt").unwrap(),
-        CString::new("PWD=/").unwrap(),
-    ];
-
-    let argv_array = argv.iter().map(|x| x.as_ptr()).collect::<Vec<_>>();
-    let envp_array = envp.iter().map(|x| x.as_ptr()).collect::<Vec<_>>();
-
-    // load init
-    let mut load_data = elf32_load_data {
-        exec_dent: Arc::into_raw(init) as *mut _,
-        argv: argv_array.as_ptr(),
-        argv_count: argv_array.len(),
-        envp: envp_array.as_ptr(),
-        envp_count: envp_array.len(),
-        ip: 0,
-        sp: 0,
+    let (ip, sp) = {
+        // mount fat32 /mnt directory
+        let fs_context = FsContext::get_current();
+        let mnt_dir = Dentry::open(&fs_context, Path::new(b"/mnt/").unwrap(), true).unwrap();
+
+        mnt_dir.mkdir(0o755).unwrap();
+
+        do_mount(
+            &mnt_dir,
+            "/dev/sda",
+            "/mnt",
+            "fat32",
+            MS_RDONLY | MS_NOATIME | MS_NODEV | MS_NOSUID,
+        )
+        .unwrap();
+
+        let init = Dentry::open(&fs_context, Path::new(b"/mnt/busybox").unwrap(), true)
+            .expect("busybox should be present in /mnt");
+
+        let argv = vec![
+            CString::new("/mnt/busybox").unwrap(),
+            CString::new("sh").unwrap(),
+            CString::new("/mnt/initsh").unwrap(),
+        ];
+
+        let envp = vec![
+            CString::new("LANG=C").unwrap(),
+            CString::new("HOME=/root").unwrap(),
+            CString::new("PATH=/mnt").unwrap(),
+            CString::new("PWD=/").unwrap(),
+        ];
+
+        let elf = ParsedElf32::parse(init.clone()).unwrap();
+        elf.load(&Thread::current().process.mm_list, argv, envp)
+            .unwrap()
     };
 
-    let result = unsafe { elf32_load(&mut load_data) };
-    if result != 0 {
-        println_fatal!("Failed to load init: {}", result);
-    }
-
     unsafe {
-        *out_sp = load_data.sp;
-        *out_ip = load_data.ip;
+        asm!(
+            "mov %ax, %fs",
+            "mov %ax, %gs",
+            "mov ${ds}, %rax",
+            "mov %ax, %ds",
+            "mov %ax, %es",
+            "push ${ds}",
+            "push {sp}",
+            "push $0x200",
+            "push ${cs}",
+            "push {ip}",
+            "iretq",
+            ds = const 0x33,
+            cs = const 0x2b,
+            in("rax") 0,
+            ip = in(reg) ip.0,
+            sp = in(reg) sp.0,
+            options(att_syntax, noreturn),
+        );
     }
 }

+ 34 - 28
src/sync.rs

@@ -5,24 +5,44 @@ pub mod spin;
 pub mod strategy;
 
 pub mod preempt {
-    use core::sync::atomic::{compiler_fence, Ordering};
+    use core::sync::atomic::{compiler_fence, AtomicUsize, Ordering};
 
     /// TODO: This should be per cpu.
-    static mut PREEMPT_COUNT: usize = 0;
+    static PREEMPT_COUNT: AtomicUsize = AtomicUsize::new(0);
 
     #[inline(always)]
     pub fn disable() {
-        unsafe { PREEMPT_COUNT += 1 };
+        PREEMPT_COUNT.fetch_add(1, Ordering::Relaxed);
         compiler_fence(Ordering::SeqCst);
     }
 
     #[inline(always)]
     pub fn enable() {
         compiler_fence(Ordering::SeqCst);
-        unsafe { PREEMPT_COUNT -= 1 };
+        PREEMPT_COUNT.fetch_sub(1, Ordering::Relaxed);
+    }
+
+    #[inline(always)]
+    pub fn count() -> usize {
+        PREEMPT_COUNT.load(Ordering::Relaxed)
     }
 }
 
+#[no_mangle]
+pub extern "C" fn r_preempt_disable() {
+    preempt::disable();
+}
+
+#[no_mangle]
+pub extern "C" fn r_preempt_enable() {
+    preempt::enable();
+}
+
+#[no_mangle]
+pub extern "C" fn r_preempt_count() -> usize {
+    preempt::count()
+}
+
 pub type Spin<T> = lock::Lock<T, spin::SpinStrategy>;
 pub type Mutex<T> = lock::Lock<T, semaphore::SemaphoreStrategy<1>>;
 #[allow(dead_code)]
@@ -78,32 +98,18 @@ impl<T: Sized + Sync, U: ?Sized> Locked<T, U> {
 
 macro_rules! might_sleep {
     () => {
-        if cfg!(debug_assertions) {
-            if unsafe { $crate::bindings::root::kernel::async_::preempt_count() } != 0 {
-                println_fatal!("failed assertion");
-                unsafe { $crate::bindings::root::freeze() };
-            }
-        } else {
-            assert_eq!(
-                unsafe { $crate::bindings::root::kernel::async_::preempt_count() },
-                0,
-                "a might_sleep function called with preempt disabled"
-            );
-        }
+        assert_eq!(
+            $crate::sync::preempt::count(),
+            0,
+            "a might_sleep function called with preempt disabled"
+        );
     };
     ($n:expr) => {
-        if cfg!(debug_assertions) {
-            if unsafe { $crate::bindings::root::kernel::async_::preempt_count() } != $n {
-                println_fatal!("failed assertion");
-                unsafe { $crate::bindings::root::freeze() };
-            }
-        } else {
-            assert_eq!(
-                unsafe { $crate::bindings::root::kernel::async_::preempt_count() },
-                $n,
-                "a might_sleep function called with the preempt count not satisfying its requirement",
-            );
-        }
+        assert_eq!(
+            $crate::sync::preempt::count(),
+            $n,
+            "a might_sleep function called with the preempt count not satisfying its requirement",
+        );
     };
 }
 

+ 2 - 2
src/sync/condvar.rs

@@ -1,5 +1,5 @@
 use crate::{
-    kernel::task::{Scheduler, Thread, ThreadState},
+    kernel::task::{Scheduler, Thread},
     prelude::*,
     sync::preempt,
 };
@@ -7,7 +7,7 @@ use crate::{
 use super::{lock::Guard, strategy::LockStrategy};
 use alloc::{collections::vec_deque::VecDeque, sync::Arc};
 
-pub struct CondVar<const Interruptible: bool> {
+pub struct CondVar<const INTERRUPTIBLE: bool> {
     waiters: Spin<VecDeque<Arc<Thread>>>,
 }
 

+ 7 - 7
src/sync/lock.rs

@@ -96,7 +96,7 @@ impl<Value: ?Sized, Strategy: LockStrategy> Lock<Value, Strategy> {
     }
 }
 
-pub struct Guard<'lock, Value: ?Sized, Strategy: LockStrategy, const Write: bool = true> {
+pub struct Guard<'lock, Value: ?Sized, Strategy: LockStrategy, const WRITE: bool = true> {
     _phantom: core::marker::PhantomData<Strategy>,
     value: &'lock UnsafeCell<Value>,
     strategy_data: &'lock Strategy::StrategyData,
@@ -119,8 +119,8 @@ impl<'lock, Value: ?Sized, Strategy: LockStrategy> Guard<'lock, Value, Strategy>
     }
 }
 
-impl<'lock, Value: ?Sized, Strategy: LockStrategy, const Write: bool> Deref
-    for Guard<'lock, Value, Strategy, Write>
+impl<'lock, Value: ?Sized, Strategy: LockStrategy, const WRITE: bool> Deref
+    for Guard<'lock, Value, Strategy, WRITE>
 {
     type Target = Value;
 
@@ -137,8 +137,8 @@ impl<'lock, Value: ?Sized, Strategy: LockStrategy> DerefMut
     }
 }
 
-impl<'lock, Value: ?Sized, Strategy: LockStrategy, const Write: bool> AsRef<Value>
-    for Guard<'lock, Value, Strategy, Write>
+impl<'lock, Value: ?Sized, Strategy: LockStrategy, const WRITE: bool> AsRef<Value>
+    for Guard<'lock, Value, Strategy, WRITE>
 {
     fn as_ref(&self) -> &Value {
         unsafe { &*self.value.get() }
@@ -153,8 +153,8 @@ impl<'lock, Value: ?Sized, Strategy: LockStrategy> AsMut<Value>
     }
 }
 
-impl<'lock, Value: ?Sized, Strategy: LockStrategy, const Write: bool> Drop
-    for Guard<'lock, Value, Strategy, Write>
+impl<'lock, Value: ?Sized, Strategy: LockStrategy, const WRITE: bool> Drop
+    for Guard<'lock, Value, Strategy, WRITE>
 {
     fn drop(&mut self) {
         unsafe { Strategy::do_unlock(&self.strategy_data, &mut self.context) }

+ 0 - 180
src/types/elf.cpp

@@ -1,180 +0,0 @@
-#include <string>
-#include <vector>
-
-#include <errno.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <string.h>
-
-#include <types/elf.hpp>
-
-#include <kernel/mem/mm_list.hpp>
-#include <kernel/mem/vm_area.hpp>
-#include <kernel/process.hpp>
-#include <kernel/vfs.hpp>
-#include <kernel/vfs/dentry.hpp>
-
-static inline void __user_push32(uintptr_t* sp, uint32_t d) {
-    // TODO: use copy_to_user
-    *(--*(uint32_t**)sp) = d;
-}
-
-static inline void __user_push_string32(uintptr_t* sp, const char* str) {
-    size_t len = strlen(str);
-
-    *sp -= (len + 1);
-    *sp &= ~0xf; // align to 16 bytes
-
-    memcpy((void*)*sp, str, len + 1);
-}
-
-int types::elf::elf32_load(types::elf::elf32_load_data& d) {
-    auto exec = fs::dentry_pointer{d.exec_dent};
-    if (!exec)
-        return -ENOENT;
-
-    types::elf::elf32_header hdr{};
-    auto n_read = fs::fs_read(exec.get(), (char*)&hdr, sizeof(types::elf::elf32_header), 0,
-                              sizeof(types::elf::elf32_header));
-
-    if (n_read != sizeof(types::elf::elf32_header))
-        return -EINVAL;
-
-    if (hdr.magic[0] != 0x7f || hdr.magic[1] != 'E' || hdr.magic[2] != 'L' || hdr.magic[3] != 'F')
-        return -EINVAL;
-
-    size_t phents_size = hdr.phentsize * hdr.phnum;
-    size_t shents_size = hdr.shentsize * hdr.shnum;
-    std::vector<types::elf::elf32_program_header_entry> phents(hdr.phnum);
-    n_read = fs::fs_read(exec.get(), (char*)phents.data(), phents_size, hdr.phoff, phents_size);
-
-    // broken file or I/O error
-    if (n_read != phents_size)
-        return -EINVAL;
-
-    std::vector<types::elf::elf32_section_header_entry> shents(hdr.shnum);
-    n_read = fs::fs_read(exec.get(), (char*)shents.data(), shents_size, hdr.shoff, shents_size);
-
-    // broken file or I/O error
-    if (n_read != shents_size)
-        return -EINVAL;
-
-    // from now on, caller process is gone.
-    // so we can't just simply return to it on error.
-    auto& mms = current_process->mms;
-    mms.clear();
-
-    uintptr_t data_segment_end = 0;
-
-    for (const auto& phent : phents) {
-        if (phent.type != types::elf::elf32_program_header_entry::PT_LOAD)
-            continue;
-
-        auto vaddr = phent.vaddr & ~0xfff;
-        auto vlen = ((phent.vaddr + phent.memsz + 0xfff) & ~0xfff) - vaddr;
-        auto flen = ((phent.vaddr + phent.filesz + 0xfff) & ~0xfff) - vaddr;
-        auto fileoff = phent.offset & ~0xfff;
-
-        using namespace kernel::mem;
-        if (flen) {
-            mm_list::map_args args{};
-
-            args.vaddr = vaddr;
-            args.length = flen;
-            args.file = fs::d_get(exec);
-            args.file_offset = fileoff;
-
-            args.flags = MM_MAPPED;
-            if (phent.flags & elf32_program_header_entry::PF_W)
-                args.flags |= MM_WRITE;
-
-            if (phent.flags & elf32_program_header_entry::PF_X)
-                args.flags |= MM_EXECUTE;
-
-            if (auto ret = mms.mmap(args); ret != 0)
-                return ELF_LOAD_FAIL_NORETURN;
-        }
-
-        if (vlen > flen) {
-            mm_list::map_args args{};
-
-            args.vaddr = vaddr + flen;
-            args.length = vlen - flen;
-
-            args.flags = MM_ANONYMOUS;
-            if (phent.flags & elf32_program_header_entry::PF_W)
-                args.flags |= MM_WRITE;
-
-            if (phent.flags & elf32_program_header_entry::PF_X)
-                args.flags |= MM_EXECUTE;
-
-            if (auto ret = mms.mmap(args); ret != 0)
-                return ELF_LOAD_FAIL_NORETURN;
-        }
-
-        if (vaddr + vlen > data_segment_end)
-            data_segment_end = vaddr + vlen;
-    }
-
-    current_process->mms.register_brk(data_segment_end + 0x10000);
-
-    for (const auto& shent : shents) {
-        if (shent.sh_type == elf32_section_header_entry::SHT_NOBITS)
-            memset((char*)(uintptr_t)shent.sh_addr, 0x00, shent.sh_size);
-    }
-
-    // map stack area
-    if (1) {
-        using namespace kernel::mem;
-        mm_list::map_args args{};
-
-        args.vaddr = ELF32_STACK_TOP;
-        args.length = ELF32_STACK_SIZE;
-        args.flags = MM_ANONYMOUS | MM_WRITE;
-
-        if (auto ret = mms.mmap(args); ret != 0)
-            return ELF_LOAD_FAIL_NORETURN;
-    }
-
-    d.ip = hdr.entry;
-    d.sp = ELF32_STACK_BOTTOM;
-
-    auto* sp = &d.sp;
-
-    // fill information block area
-    std::vector<elf32_addr_t> args, envs;
-    for (size_t i = 0; i < d.envp_count; ++i) {
-        __user_push_string32(sp, d.envp[i]);
-        envs.push_back((uintptr_t)*sp);
-    }
-    for (size_t i = 0; i < d.argv_count; ++i) {
-        __user_push_string32(sp, d.argv[i]);
-        args.push_back((uintptr_t)*sp);
-    }
-
-    // push null auxiliary vector entry
-    __user_push32(sp, 0);
-    __user_push32(sp, 0);
-
-    // push 0 for envp
-    __user_push32(sp, 0);
-
-    // push envp
-    for (auto ent : envs)
-        __user_push32(sp, ent);
-
-    // push 0 for argv
-    __user_push32(sp, 0);
-
-    // push argv
-    for (int i = args.size() - 1; i >= 0; --i)
-        __user_push32(sp, args[i]);
-
-    // push argc
-    __user_push32(sp, args.size());
-
-    // TODO!!!: rename current thread
-    current_thread->name = "[thread]";
-
-    return 0;
-}

+ 2 - 4
src/types/libstdcpp.cpp

@@ -17,9 +17,7 @@ extern "C" void NORETURN __cxa_pure_virtual(void) {
         ;
 }
 
-void NORETURN __assert_fail(const char* statement, const char* file, int line,
-                            const char* func) {
-    kmsgf("Kernel assertion failed: (%s), %s:%d, %s", statement, file, line,
-          func);
+void NORETURN __assert_fail(const char* statement, const char* file, int line, const char* func) {
+    (void)statement, (void)file, (void)line, (void)func;
     freeze();
 }