Browse Source

move interrupt and vm to rust

greatbridf 11 tháng trước cách đây
mục cha
commit
fca6223938
64 tập tin đã thay đổi với 1271 bổ sung1888 xóa
  1. 10 19
      CMakeLists.txt
  2. 4 0
      Cargo.toml
  3. 2 0
      arch/src/lib.rs
  4. 14 0
      arch/x86_64/src/interrupt.rs
  5. 14 1
      arch/x86_64/src/lib.rs
  6. 24 27
      arch/x86_64/src/task.rs
  7. 0 1
      build.rs
  8. 0 16
      include/kernel/async/lock.hpp
  9. 0 6
      include/kernel/hw/pci.hpp
  10. 0 5
      include/kernel/interrupt.hpp
  11. 0 11
      include/kernel/irq.hpp
  12. 0 112
      include/kernel/mem/mm_list.hpp
  13. 0 2
      include/kernel/mem/paging_asm.h
  14. 0 60
      include/kernel/mem/vm_area.hpp
  15. 0 13
      include/kernel/process.hpp
  16. 0 60
      include/kernel/vfs.hpp
  17. 0 26
      include/kernel/vfs/dentry.hpp
  18. 0 18
      include/kernel/vfs/vfsfwd.hpp
  19. 0 295
      include/types/elf.hpp
  20. 1 1
      src/asm/interrupt.s
  21. 2 1
      src/boot.s
  22. 4 4
      src/driver.rs
  23. 2 0
      src/driver/timer.rs
  24. 370 0
      src/elf.rs
  25. 1 1
      src/fs/procfs.rs
  26. 8 0
      src/io.rs
  27. 33 28
      src/kernel.ld
  28. 1 1
      src/kernel.rs
  29. 7 29
      src/kernel/async/lock.cc
  30. 3 0
      src/kernel/console.rs
  31. 2 0
      src/kernel/constants.rs
  32. 2 1
      src/kernel/hw/pci.cc
  33. 0 142
      src/kernel/interrupt.cpp
  34. 118 6
      src/kernel/interrupt.rs
  35. 2 2
      src/kernel/mem.rs
  36. 2 2
      src/kernel/mem/mm_area.rs
  37. 45 17
      src/kernel/mem/mm_list.rs
  38. 202 0
      src/kernel/mem/mm_list/page_fault.rs
  39. 53 39
      src/kernel/mem/page_table.rs
  40. 0 146
      src/kernel/mem/paging.cc
  41. 36 12
      src/kernel/mem/paging.rs
  42. 1 90
      src/kernel/process.cpp
  43. 2 11
      src/kernel/syscall.rs
  44. 15 4
      src/kernel/syscall/mm.rs
  45. 23 30
      src/kernel/syscall/procops.rs
  46. 2 2
      src/kernel/task.rs
  47. 1 1
      src/kernel/task/kstack.rs
  48. 15 1
      src/kernel/task/scheduler.rs
  49. 7 8
      src/kernel/task/signal.rs
  50. 46 55
      src/kernel/task/thread.rs
  51. 1 65
      src/kernel/terminal.rs
  52. 5 2
      src/kernel/timer.rs
  53. 0 76
      src/kernel/vfs.cpp
  54. 7 80
      src/kernel/vfs/dentry.rs
  55. 0 22
      src/kernel/vfs/ffi.rs
  56. 0 30
      src/kernel/vfs/filearray.rs
  57. 0 1
      src/kernel/vfs/mod.rs
  58. 10 28
      src/kinit.cpp
  59. 129 57
      src/lib.rs
  60. 34 28
      src/sync.rs
  61. 2 2
      src/sync/condvar.rs
  62. 7 7
      src/sync/lock.rs
  63. 0 180
      src/types/elf.cpp
  64. 2 4
      src/types/libstdcpp.cpp

+ 10 - 19
CMakeLists.txt

@@ -41,31 +41,23 @@ set(BOOTLOADER_SOURCES src/boot.s
 set(KERNEL_MAIN_SOURCES src/kinit.cpp
                         src/kernel/async/lock.cc
                         src/kernel/allocator.cc
-                        src/kernel/interrupt.cpp
                         src/kernel/process.cpp
                         src/kernel/mem/paging.cc
                         src/kernel/mem/slab.cc
-                        src/kernel/vfs.cpp
                         src/kernel/vga.cpp
                         src/kernel/hw/acpi.cc
                         src/kernel/hw/pci.cc
                         src/net/ethernet.cc
                         src/types/crc.cc
-                        src/types/elf.cpp
                         src/types/libstdcpp.cpp
                         include/defs.hpp
                         include/kernel/async/lock.hpp
                         include/kernel/interrupt.hpp
-                        include/kernel/irq.hpp
                         include/kernel/process.hpp
-                        include/kernel/mem/mm_list.hpp
                         include/kernel/mem/paging.hpp
                         include/kernel/mem/slab.hpp
                         include/kernel/mem/types.hpp
-                        include/kernel/mem/vm_area.hpp
                         include/kernel/utsname.hpp
-                        include/kernel/vfs.hpp
-                        include/kernel/vfs/dentry.hpp
                         include/kernel/vga.hpp
                         include/kernel/task/forward.hpp
                         include/kernel/hw/acpi.hpp
@@ -77,7 +69,6 @@ set(KERNEL_MAIN_SOURCES src/kinit.cpp
                         include/net/netdev.hpp
                         include/types/bitmap.hpp
                         include/types/buffer.hpp
-                        include/types/elf.hpp
                         include/types/list.hpp
                         include/types/types.h
                         include/types/allocator.hpp
@@ -112,16 +103,16 @@ add_custom_target(boot.img
     DEPENDS user_space_programs
     COMMAND dd if=mbr_hole.bin of=boot.img
     COMMAND dd if=/dev/zero of=boot.img bs=`expr 512 \\* 1024 \\* 1024` count=0 seek=1
-    COMMAND sh -c \"echo n\; echo\; echo\; echo\; echo\; echo a\; echo w\" | ${FDISK_BIN} boot.img
-    COMMAND mkfs.fat --offset=2048 -v -n SYSTEM boot.img
-    COMMAND mcopy -i boot.img@@1M ${CMAKE_BINARY_DIR}/user-space-program/hello-world.out ::hello
-    COMMAND mcopy -i boot.img@@1M ${CMAKE_BINARY_DIR}/user-space-program/interrupt-test.out ::int
-    COMMAND mcopy -i boot.img@@1M ${CMAKE_BINARY_DIR}/user-space-program/stack-test.out ::stack
-    COMMAND mcopy -i boot.img@@1M ${CMAKE_BINARY_DIR}/user-space-program/init.out ::init
-    COMMAND mcopy -i boot.img@@1M ${CMAKE_BINARY_DIR}/user-space-program/priv-test.out ::priv
-    COMMAND mcopy -i boot.img@@1M ${CMAKE_SOURCE_DIR}/busybox-minimal ::busybox_
-    COMMAND mcopy -i boot.img@@1M ${CMAKE_SOURCE_DIR}/busybox ::busybox
-    COMMAND mcopy -i boot.img@@1M ${CMAKE_SOURCE_DIR}/init_script.sh ::initsh
+    COMMAND sh -c \"echo n\; echo\; echo \; echo 8192\; echo\; echo a\; echo w\" | ${FDISK_BIN} boot.img
+    COMMAND mkfs.fat --offset=8192 -v -n SYSTEM boot.img
+    COMMAND mcopy -i boot.img@@4M ${CMAKE_BINARY_DIR}/user-space-program/hello-world.out ::hello
+    COMMAND mcopy -i boot.img@@4M ${CMAKE_BINARY_DIR}/user-space-program/interrupt-test.out ::int
+    COMMAND mcopy -i boot.img@@4M ${CMAKE_BINARY_DIR}/user-space-program/stack-test.out ::stack
+    COMMAND mcopy -i boot.img@@4M ${CMAKE_BINARY_DIR}/user-space-program/init.out ::init
+    COMMAND mcopy -i boot.img@@4M ${CMAKE_BINARY_DIR}/user-space-program/priv-test.out ::priv
+    COMMAND mcopy -i boot.img@@4M ${CMAKE_SOURCE_DIR}/busybox-minimal ::busybox_
+    COMMAND mcopy -i boot.img@@4M ${CMAKE_SOURCE_DIR}/busybox ::busybox
+    COMMAND mcopy -i boot.img@@4M ${CMAKE_SOURCE_DIR}/init_script.sh ::initsh
 )
 
 add_custom_command(OUTPUT run

+ 4 - 0
Cargo.toml

@@ -19,6 +19,10 @@ bindgen = "0.70.1"
 [profile.dev]
 panic = "abort"
 
+[profile.dev.package.core]
+opt-level = 2
+debug = true
+
 [profile.dev.package."*"]
 opt-level = 2
 debug = false

+ 2 - 0
arch/src/lib.rs

@@ -90,3 +90,5 @@ pub mod io {
         x86_64::io::outl(port, data)
     }
 }
+
+pub use x86_64;

+ 14 - 0
arch/x86_64/src/interrupt.rs

@@ -11,3 +11,17 @@ pub fn disable() {
         asm!("cli");
     }
 }
+
+pub fn lidt(base: usize, limit: u16) {
+    let mut idt_descriptor = [0u16; 5];
+
+    idt_descriptor[0] = limit;
+    idt_descriptor[1] = base as u16;
+    idt_descriptor[2] = (base >> 16) as u16;
+    idt_descriptor[3] = (base >> 32) as u16;
+    idt_descriptor[4] = (base >> 48) as u16;
+
+    unsafe {
+        asm!("lidt ({})", in(reg) &idt_descriptor, options(att_syntax));
+    }
+}

+ 14 - 1
arch/x86_64/src/lib.rs

@@ -43,12 +43,25 @@ pub mod vm {
     pub fn set_cr3(pfn: usize) {
         unsafe {
             asm!(
-                "mov %cr3, {0}",
+                "mov {0}, %cr3",
                 in(reg) pfn,
                 options(att_syntax)
             );
         }
     }
+
+    #[inline(always)]
+    pub fn get_cr2() -> usize {
+        let cr2: usize;
+        unsafe {
+            asm!(
+                "mov %cr2, {}",
+                out(reg) cr2,
+                options(att_syntax)
+            );
+        }
+        cr2
+    }
 }
 
 pub mod interrupt;

+ 24 - 27
arch/x86_64/src/task.rs

@@ -1,5 +1,7 @@
 use core::arch::{asm, global_asm};
 
+use crate::interrupt;
+
 #[inline(always)]
 pub fn halt() {
     unsafe {
@@ -17,10 +19,7 @@ pub fn pause() {
 #[inline(always)]
 pub fn freeze() -> ! {
     loop {
-        unsafe {
-            asm!("cli", options(att_syntax, nostack));
-        }
-
+        interrupt::disable();
         halt();
     }
 }
@@ -28,13 +27,13 @@ pub fn freeze() -> ! {
 global_asm!(
     r"
     .macro movcfi reg, offset
-    	mov \reg, \offset(%rsp)
-    	.cfi_rel_offset \reg, \offset
+        mov \reg, \offset(%rsp)
+        .cfi_rel_offset \reg, \offset
     .endm
 
     .macro movrst reg, offset
-    	mov \offset(%rsp), \reg
-    	.cfi_restore \reg
+        mov \offset(%rsp), \reg
+        .cfi_restore \reg
     .endm
 
     .globl __context_switch_light
@@ -45,40 +44,38 @@ global_asm!(
         pushf
     .cfi_def_cfa_offset 0x10
 
-	    sub $0x38, %rsp  # extra 8 bytes to align to 16 bytes
+        sub $0x38, %rsp  # extra 8 bytes to align to 16 bytes
     .cfi_def_cfa_offset 0x48
 
-	    movcfi %rbx, 0x08
-	    movcfi %rbp, 0x10
-	    movcfi %r12, 0x18
-	    movcfi %r13, 0x20
-	    movcfi %r14, 0x28
-	    movcfi %r15, 0x30
+        movcfi %rbx, 0x08
+        movcfi %rbp, 0x10
+        movcfi %r12, 0x18
+        movcfi %r13, 0x20
+        movcfi %r14, 0x28
+        movcfi %r15, 0x30
 
-        push (%rdi) 	 # save sp of previous stack frame of current
-	                     # acts as saving bp
+        push (%rdi)      # save sp of previous stack frame of current
+                         # acts as saving bp
     .cfi_def_cfa_offset 0x50
 
         mov %rsp, (%rdi) # save sp of current stack
         mov (%rsi), %rsp # load sp of target stack
 
         pop (%rsi)       # load sp of previous stack frame of target
-	                     # acts as restoring previous bp
+                         # acts as restoring previous bp
     .cfi_def_cfa_offset 0x48
 
-	    pop %rax         # align to 16 bytes
+        pop %rax         # align to 16 bytes
     .cfi_def_cfa_offset 0x40
 
-	    call after_ctx_switch
-
-	    mov 0x28(%rsp), %r15
-	    mov 0x20(%rsp), %r14
-	    mov 0x18(%rsp), %r13
-	    mov 0x10(%rsp), %r12
-	    mov 0x08(%rsp), %rbp
+        mov 0x28(%rsp), %r15
+        mov 0x20(%rsp), %r14
+        mov 0x18(%rsp), %r13
+        mov 0x10(%rsp), %r12
+        mov 0x08(%rsp), %rbp
         mov 0x00(%rsp), %rbx
 
-	    add $0x30, %rsp
+        add $0x30, %rsp
     .cfi_def_cfa_offset 0x10
 
         popf

+ 0 - 1
build.rs

@@ -5,7 +5,6 @@ fn main() {
     let headers = [
         "include/kernel/process.hpp",
         "include/kernel/hw/pci.hpp",
-        "include/types/elf.hpp",
     ];
 
     let bindings = bindgen::Builder::default()

+ 0 - 16
include/kernel/async/lock.hpp

@@ -8,11 +8,6 @@ namespace kernel::async {
 
 using spinlock_t = unsigned long volatile;
 using lock_context_t = unsigned long;
-using preempt_count_t = ssize_t;
-
-void preempt_disable();
-void preempt_enable();
-preempt_count_t preempt_count();
 
 void init_spinlock(spinlock_t& lock);
 
@@ -38,17 +33,6 @@ class mutex {
     void unlock_irq(lock_context_t state);
 };
 
-class lock_guard {
-   private:
-    mutex& m_mtx;
-
-   public:
-    explicit inline lock_guard(mutex& mtx) : m_mtx{mtx} { m_mtx.lock(); }
-    lock_guard(const lock_guard&) = delete;
-
-    inline ~lock_guard() { m_mtx.unlock(); }
-};
-
 class lock_guard_irq {
    private:
     mutex& m_mtx;

+ 0 - 6
include/kernel/hw/pci.hpp

@@ -9,12 +9,6 @@
 
 #include <kernel/mem/phys.hpp>
 
-namespace kernel::kinit {
-
-void init_pci();
-
-} // namespace kernel::kinit
-
 namespace kernel::hw::pci {
 
 struct PACKED device_header_base {

+ 0 - 5
include/kernel/interrupt.hpp

@@ -36,8 +36,3 @@ struct interrupt_stack {
 struct mmx_registers {
     uint8_t data[512]; // TODO: list of content
 };
-
-namespace kernel::kinit {
-void init_interrupt();
-
-} // namespace kernel::kinit

+ 0 - 11
include/kernel/irq.hpp

@@ -1,11 +0,0 @@
-#pragma once
-
-#include <functional>
-
-namespace kernel::irq {
-
-using irq_handler_t = std::function<void()>;
-
-void register_handler(int irqno, irq_handler_t handler);
-
-}; // namespace kernel::irq

+ 0 - 112
include/kernel/mem/mm_list.hpp

@@ -1,112 +0,0 @@
-#pragma once
-
-#include "paging.hpp"
-#include "vm_area.hpp"
-
-#include <set>
-
-#include <stdint.h>
-
-#include <kernel/vfs/dentry.hpp>
-
-namespace kernel::mem {
-
-constexpr uintptr_t KERNEL_SPACE_START = 0x8000000000000000ULL;
-constexpr uintptr_t USER_SPACE_MEMORY_TOP = 0x0000800000000000ULL;
-constexpr uintptr_t MMAP_MIN_ADDR = 0x0000000000001000ULL;
-constexpr uintptr_t STACK_MIN_ADDR = 0x0000700000000000ULL;
-
-class mm_list {
-   private:
-    struct comparator {
-        constexpr bool operator()(const vm_area& lhs,
-                                  const vm_area& rhs) const noexcept {
-            return lhs < rhs;
-        }
-        constexpr bool operator()(const vm_area& lhs,
-                                  uintptr_t rhs) const noexcept {
-            return lhs < rhs;
-        }
-        constexpr bool operator()(uintptr_t lhs,
-                                  const vm_area& rhs) const noexcept {
-            return lhs < rhs;
-        }
-    };
-
-   public:
-    using list_type = std::set<vm_area, comparator>;
-    using iterator = list_type::iterator;
-    using const_iterator = list_type::const_iterator;
-
-    struct map_args {
-        // MUSE BE aligned to 4kb boundary
-        uintptr_t vaddr;
-        // MUSE BE aligned to 4kb boundary
-        std::size_t length;
-
-        unsigned long flags;
-
-        fs::dentry_pointer file;
-        // MUSE BE aligned to 4kb boundary
-        std::size_t file_offset;
-    };
-
-   private:
-    list_type m_areas;
-    paging::pfn_t m_pt;
-    iterator m_brk{};
-
-   public:
-    // default constructor copies kernel_mms
-    explicit mm_list();
-    // copies kernel_mms and mirrors user space
-    explicit mm_list(const mm_list& other);
-
-    constexpr mm_list(mm_list&& v)
-        : m_areas(std::move(v.m_areas))
-        , m_pt(std::exchange(v.m_pt, 0))
-        , m_brk{std::move(v.m_brk)} {}
-
-    ~mm_list();
-
-    void switch_pd() const noexcept;
-
-    int register_brk(uintptr_t addr);
-    uintptr_t set_brk(uintptr_t addr);
-
-    void clear();
-
-    // split the memory block at the specified address
-    // return: iterator to the new block
-    iterator split(iterator area, uintptr_t at);
-
-    bool is_avail(uintptr_t addr) const;
-    bool is_avail(uintptr_t start, std::size_t length) const noexcept;
-
-    uintptr_t find_avail(uintptr_t hint, size_t length) const;
-
-    int unmap(iterator area, bool should_invalidate_tlb);
-    int unmap(uintptr_t start, std::size_t length, bool should_invalidate_tlb);
-
-    int mmap(const map_args& args);
-
-    constexpr vm_area* find(uintptr_t lp) {
-        auto iter = m_areas.find(lp);
-        if (iter == m_areas.end())
-            return nullptr;
-        return &iter;
-    }
-
-    constexpr const vm_area* find(uintptr_t lp) const {
-        auto iter = m_areas.find(lp);
-        if (iter == m_areas.end())
-            return nullptr;
-        return &iter;
-    }
-
-    constexpr paging::PSE get_page_table() const noexcept {
-        return paging::PSE{m_pt};
-    }
-};
-
-} // namespace kernel::mem

+ 0 - 2
include/kernel/mem/paging_asm.h

@@ -1,4 +1,3 @@
-
 #define KERNEL_IMAGE_PADDR         0x400000
 #define KERNEL_STAGE1_PADDR        0x001000
 #define KERNEL_PML4                0x002000
@@ -7,7 +6,6 @@
 #define KERNEL_PD_KIMAGE           0x005000
 #define KERNEL_PT_KIMAGE           0x006000
 #define KERNEL_PD_STRUCT_PAGE_ARR  0x007000
-#define EMPTY_PAGE_PFN             0x008000
 
 #define KERNEL_BSS_HUGE_PAGE       0x200000
 

+ 0 - 60
include/kernel/mem/vm_area.hpp

@@ -1,60 +0,0 @@
-#pragma once
-
-#include <stdint.h>
-
-#include <kernel/vfs.hpp>
-#include <kernel/vfs/dentry.hpp>
-
-namespace kernel::mem {
-
-constexpr unsigned long MM_WRITE = 0x00000000'00000001;
-constexpr unsigned long MM_EXECUTE = 0x00000000'00000002;
-constexpr unsigned long MM_MAPPED = 0x00000000'00000004;
-constexpr unsigned long MM_ANONYMOUS = 0x00000000'00000008;
-constexpr unsigned long MM_INTERNAL_MASK = 0xffffffff'00000000;
-constexpr unsigned long MM_BREAK = 0x80000000'00000000;
-
-struct vm_area {
-    uintptr_t start;
-    uintptr_t end;
-
-    unsigned long flags;
-
-    fs::dentry_pointer mapped_file;
-    std::size_t file_offset;
-
-    constexpr bool is_avail(uintptr_t ostart, uintptr_t oend) const noexcept {
-        return (ostart >= end || oend <= start);
-    }
-
-    constexpr bool operator<(const vm_area& rhs) const noexcept { return end <= rhs.start; }
-    constexpr bool operator<(uintptr_t rhs) const noexcept { return end <= rhs; }
-    friend constexpr bool operator<(uintptr_t lhs, const vm_area& rhs) noexcept {
-        return lhs < rhs.start;
-    }
-
-    constexpr vm_area(uintptr_t start, unsigned long flags, uintptr_t end,
-                      fs::dentry_pointer mapped_file = nullptr, std::size_t offset = 0)
-        : start{start}
-        , end{end}
-        , flags{flags}
-        , mapped_file{std::move(mapped_file)}
-        , file_offset{offset} {}
-
-    constexpr vm_area(uintptr_t start, unsigned long flags,
-                      fs::dentry_pointer mapped_file = nullptr, std::size_t offset = 0)
-        : start{start}
-        , end{start}
-        , flags{flags}
-        , mapped_file{std::move(mapped_file)}
-        , file_offset{offset} {}
-
-    inline vm_area(const vm_area& other)
-        : start{other.start}
-        , end{other.end}
-        , flags{other.flags}
-        , mapped_file{d_get(other.mapped_file)}
-        , file_offset{other.file_offset} {}
-};
-
-} // namespace kernel::mem

+ 0 - 13
include/kernel/process.hpp

@@ -1,11 +1,5 @@
 #pragma once
 
-#include <list>
-#include <map>
-#include <set>
-#include <tuple>
-#include <utility>
-
 #include <assert.h>
 #include <errno.h>
 #include <fcntl.h>
@@ -19,14 +13,7 @@
 #include <types/types.h>
 
 #include <kernel/interrupt.hpp>
-#include <kernel/mem/mm_list.hpp>
 #include <kernel/mem/paging.hpp>
 #include <kernel/vfs.hpp>
-#include <kernel/vfs/dentry.hpp>
-
-void NORETURN init_scheduler(kernel::mem::paging::pfn_t kernel_stack_pfn);
-/// @return true if returned normally, false if being interrupted
-void NORETURN schedule_noreturn(void);
 
 void NORETURN freeze(void);
-void NORETURN kill_current(int signo);

+ 0 - 60
include/kernel/vfs.hpp

@@ -5,11 +5,6 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 
-#include <types/path.hpp>
-
-#include <kernel/mem/paging.hpp>
-#include <kernel/vfs/dentry.hpp>
-
 #define NODE_MAJOR(node) (((node) >> 8) & 0xFFU)
 #define NODE_MINOR(node) ((node) & 0xFFU)
 
@@ -19,59 +14,4 @@ constexpr dev_t make_device(uint32_t major, uint32_t minor) {
     return ((major << 8) & 0xFF00U) | (minor & 0xFFU);
 }
 
-// buf, buf_size, cnt
-using chrdev_read = std::function<ssize_t(char*, std::size_t, std::size_t)>;
-
-// buf, cnt
-using chrdev_write = std::function<ssize_t(const char*, std::size_t)>;
-
-struct chrdev_ops {
-    chrdev_read read;
-    chrdev_write write;
-};
-
-int register_char_device(dev_t node, const chrdev_ops& ops);
-ssize_t char_device_read(dev_t node, char* buf, size_t buf_size, size_t n);
-ssize_t char_device_write(dev_t node, const char* buf, size_t n);
-
-class rust_file_array {
-   public:
-    struct handle;
-
-   private:
-    struct handle* m_handle;
-
-   public:
-    rust_file_array(struct handle* handle);
-    rust_file_array(const rust_file_array&) = delete;
-    ~rust_file_array();
-
-    constexpr rust_file_array(rust_file_array&& other) noexcept
-        : m_handle(std::exchange(other.m_handle, nullptr)) {}
-
-    struct handle* get() const;
-    void drop();
-};
-
-class rust_fs_context {
-   public:
-    struct handle;
-
-   private:
-    struct handle* m_handle;
-
-   public:
-    rust_fs_context(struct handle* handle);
-    rust_fs_context(const rust_fs_context&) = delete;
-    ~rust_fs_context();
-
-    constexpr rust_fs_context(rust_fs_context&& other) noexcept
-        : m_handle(std::exchange(other.m_handle, nullptr)) {}
-
-    struct handle* get() const;
-    void drop();
-};
-
-extern "C" size_t fs_read(struct dentry* file, char* buf, size_t buf_size, size_t offset, size_t n);
-
 } // namespace fs

+ 0 - 26
include/kernel/vfs/dentry.hpp

@@ -1,26 +0,0 @@
-#pragma once
-
-#include <string>
-
-#include <bits/alltypes.h>
-
-#include <types/path.hpp>
-
-#include <kernel/async/lock.hpp>
-
-struct dentry;
-
-namespace fs {
-
-struct rust_vfs_handle {
-    void* data[2];
-};
-
-struct dentry_deleter {
-    void operator()(struct dentry* dentry) const;
-};
-
-using dentry_pointer = std::unique_ptr<struct dentry, dentry_deleter>;
-dentry_pointer d_get(const dentry_pointer& dp);
-
-} // namespace fs

+ 0 - 18
include/kernel/vfs/vfsfwd.hpp

@@ -1,18 +0,0 @@
-#pragma once
-
-namespace fs {
-
-// in dentry.hpp
-struct dcache;
-struct dentry;
-
-// in filearray.hpp
-class file_array;
-
-// in inode.hpp
-struct inode;
-
-// in vfs.hpp
-class vfs;
-
-} // namespace fs

+ 0 - 295
include/types/elf.hpp

@@ -1,295 +0,0 @@
-#pragma once
-
-#include <vector>
-
-#include <stdint.h>
-
-#include <kernel/interrupt.hpp>
-#include <kernel/process.hpp>
-#include <kernel/vfs.hpp>
-#include <kernel/vfs/dentry.hpp>
-
-namespace types::elf {
-
-using elf32_addr_t = uint32_t;
-using elf32_off_t = uint32_t;
-
-using elf64_addr_t = uint64_t;
-using elf64_off_t = uint64_t;
-
-constexpr elf32_addr_t ELF32_STACK_BOTTOM = 0xbffff000;
-constexpr elf32_off_t ELF32_STACK_SIZE = 8 * 1024 * 1024;
-constexpr elf32_addr_t ELF32_STACK_TOP = ELF32_STACK_BOTTOM - ELF32_STACK_SIZE;
-
-constexpr int ELF_LOAD_FAIL_NORETURN = 0x114514;
-
-struct PACKED elf32_header {
-    // 0x7f, "ELF"
-    char magic[4];
-
-    enum : uint8_t {
-        FORMAT_32 = 1,
-        FORMAT_64 = 2,
-    } format;
-    enum : uint8_t {
-        ENDIAN_LITTLE = 1,
-        ENDIAN_BIG = 2,
-    } endian;
-    // should be 1
-    uint8_t _version1;
-    enum : uint8_t {
-        ABI_SYSTEM_V = 0x00,
-        // TODO:
-        ABI_LINUX = 0x03,
-    } abi;
-    uint8_t abi_version;
-    uint8_t _reserved[7];
-    enum : uint16_t {
-        ET_NONE = 0x00,
-        ET_REL = 0x01,
-        ET_EXEC = 0x02,
-        ET_DYN = 0x03,
-        ET_CORE = 0x04,
-        ET_LOOS = 0xfe00,
-        ET_HIOS = 0xfeff,
-        ET_LOPROC = 0xff00,
-        ET_HIPROC = 0xffff,
-    } type;
-    enum : uint16_t {
-        ARCH_NONE = 0x00,
-        ARCH_X86 = 0x03,
-        ARCH_ARM = 0x28,
-        ARCH_IA64 = 0x32,
-        ARCH_X86_64 = 0x3e,
-        ARCH_ARM64 = 0xb7,
-        ARCH_RISCV = 0xf3,
-    } arch;
-    // should be 1
-    uint32_t _version2;
-    // entry address
-    elf32_addr_t entry;
-    // program header table offset
-    elf32_off_t phoff;
-    // section header table offset
-    elf32_off_t shoff;
-    // architecture dependent flags
-    uint32_t flags;
-    // elf header size
-    uint16_t ehsize;
-    // program header table entry size
-    uint16_t phentsize;
-    // program header table entries number
-    uint16_t phnum;
-    // section header table entry size
-    uint16_t shentsize;
-    // section header table entries number
-    uint16_t shnum;
-    // section header table entry index that contains section names
-    uint16_t shstrndx;
-};
-
-struct PACKED elf32_program_header_entry {
-    enum : uint32_t {
-        PT_NULL = 0x00,
-        PT_LOAD = 0x01,
-        PT_DYNAMIC = 0x02,
-        PT_INTERP = 0x03,
-        PT_NOTE = 0x04,
-        PT_SHLIB = 0x05,
-        PT_PHDR = 0x06,
-        PT_TLS = 0x07,
-        PT_LOOS = 0x60000000,
-        PT_HIOS = 0x6fffffff,
-        PT_LIPROC = 0x70000000,
-        PT_HIPROC = 0x7fffffff,
-    } type;
-    elf32_off_t offset;
-    elf32_addr_t vaddr;
-    elf32_addr_t paddr;
-    elf32_off_t filesz;
-    elf32_off_t memsz;
-    // segment dependent
-    enum : uint32_t {
-        PF_X = 0x1,
-        PF_W = 0x2,
-        PF_R = 0x4,
-    } flags;
-    // 0 and 1 for no alignment, otherwise power of 2
-    uint32_t align;
-};
-
-struct PACKED elf32_section_header_entry {
-    elf32_off_t sh_name;
-    enum : uint32_t {
-        SHT_NULL = 0x00,
-        SHT_PROGBITS = 0x01,
-        SHT_RELA = 0x04,
-        SHT_DYNAMIC = 0x06,
-        SHT_NOTE = 0x07,
-        SHT_NOBITS = 0x08,
-        SHT_REL = 0x09,
-        SHT_DYNSYM = 0x0b,
-        SHT_INIT_ARRAY = 0x0e,
-        SHT_FINI_ARRAY = 0x0f,
-        SHT_PREINIT_ARRAY = 0x0f,
-    } sh_type;
-    enum : uint32_t {
-        SHF_WRITE = 0x01,
-        SHF_ALLOC = 0x02,
-        SHF_EXECINSTR = 0x04,
-    } sh_flags;
-    elf32_addr_t sh_addr;
-    elf32_off_t sh_offset;
-    elf32_off_t sh_size;
-    uint32_t sh_link;
-    uint32_t sh_info;
-    elf32_off_t sh_addralign;
-    elf32_off_t sh_entsize;
-};
-
-struct elf32_load_data {
-    struct dentry* exec_dent; // Owned
-    const char* const* argv;
-    size_t argv_count;
-    const char* const* envp;
-    size_t envp_count;
-    uintptr_t ip;
-    uintptr_t sp;
-};
-
-// TODO: environment variables
-int elf32_load(elf32_load_data& data);
-
-struct PACKED elf64_header {
-    // 0x7f, "ELF"
-    char magic[4];
-
-    enum : uint8_t {
-        FORMAT_32 = 1,
-        FORMAT_64 = 2,
-    } format;
-    enum : uint8_t {
-        ENDIAN_LITTLE = 1,
-        ENDIAN_BIG = 2,
-    } endian;
-    // should be 1
-    uint8_t _version1;
-    enum : uint8_t {
-        ABI_SYSTEM_V = 0x00,
-        // TODO:
-        ABI_LINUX = 0x03,
-    } abi;
-    uint8_t abi_version;
-    uint8_t _reserved[7];
-    enum : uint16_t {
-        ET_NONE = 0x00,
-        ET_REL = 0x01,
-        ET_EXEC = 0x02,
-        ET_DYN = 0x03,
-        ET_CORE = 0x04,
-        ET_LOOS = 0xfe00,
-        ET_HIOS = 0xfeff,
-        ET_LOPROC = 0xff00,
-        ET_HIPROC = 0xffff,
-    } type;
-    enum : uint16_t {
-        ARCH_NONE = 0x00,
-        ARCH_X86 = 0x03,
-        ARCH_ARM = 0x28,
-        ARCH_IA64 = 0x32,
-        ARCH_X86_64 = 0x3e,
-        ARCH_ARM64 = 0xb7,
-        ARCH_RISCV = 0xf3,
-    } arch;
-    // should be 1
-    uint32_t _version2;
-    // entry address
-    elf64_addr_t entry;
-    // program header table offset
-    elf64_off_t phoff;
-    // section header table offset
-    elf64_off_t shoff;
-    // architecture dependent flags
-    uint32_t flags;
-    // elf header size
-    uint16_t ehsize;
-    // program header table entry size
-    uint16_t phentsize;
-    // program header table entries number
-    uint16_t phnum;
-    // section header table entry size
-    uint16_t shentsize;
-    // section header table entries number
-    uint16_t shnum;
-    // section header table entry index that contains section names
-    uint16_t shstrndx;
-};
-
-struct PACKED elf64_program_header_entry {
-    enum : uint32_t {
-        PT_NULL = 0x00,
-        PT_LOAD = 0x01,
-        PT_DYNAMIC = 0x02,
-        PT_INTERP = 0x03,
-        PT_NOTE = 0x04,
-        PT_SHLIB = 0x05,
-        PT_PHDR = 0x06,
-        PT_TLS = 0x07,
-        PT_LOOS = 0x60000000,
-        PT_HIOS = 0x6fffffff,
-        PT_LIPROC = 0x70000000,
-        PT_HIPROC = 0x7fffffff,
-    } type;
-    // segment dependent
-    enum : uint32_t {
-        PF_X = 0x1,
-        PF_W = 0x2,
-        PF_R = 0x4,
-    } flags;
-    elf64_off_t offset;
-    elf64_addr_t vaddr;
-    elf64_addr_t paddr;
-    elf64_off_t filesz;
-    elf64_off_t memsz;
-    // 0 and 1 for no alignment, otherwise power of 2
-    uint64_t align;
-};
-
-struct PACKED elf64_section_header_entry {
-    uint32_t sh_name;
-    enum : uint32_t {
-        SHT_NULL = 0x00,
-        SHT_PROGBITS = 0x01,
-        SHT_RELA = 0x04,
-        SHT_DYNAMIC = 0x06,
-        SHT_NOTE = 0x07,
-        SHT_NOBITS = 0x08,
-        SHT_REL = 0x09,
-        SHT_DYNSYM = 0x0b,
-        SHT_INIT_ARRAY = 0x0e,
-        SHT_FINI_ARRAY = 0x0f,
-        SHT_PREINIT_ARRAY = 0x0f,
-    } sh_type;
-    enum : uint64_t {
-        SHF_WRITE = 0x01,
-        SHF_ALLOC = 0x02,
-        SHF_EXECINSTR = 0x04,
-    } sh_flags;
-    elf64_addr_t sh_addr;
-    elf64_off_t sh_offset;
-    elf64_off_t sh_size;
-    uint32_t sh_link;
-    uint32_t sh_info;
-    elf64_off_t sh_addralign;
-    elf64_off_t sh_entsize;
-};
-
-struct elf64_load_data {
-    fs::dentry_pointer exec_dent;
-    std::vector<std::string> argv;
-    std::vector<std::string> envp;
-    unsigned long ip;
-    unsigned long sp;
-};
-
-} // namespace types::elf

+ 1 - 1
src/asm/interrupt.s

@@ -33,8 +33,8 @@
 	.cfi_restore \reg
 .endm
 
-.extern after_ctx_switch
 .globl ISR_stub_restore
+.type ISR_stub_restore @function
 
 ISR_stub:
 	.cfi_startproc

+ 2 - 1
src/boot.s

@@ -295,7 +295,8 @@ fill_pxe:
 
 .section .text
 start_64bit:
-    # set stack pointer and clear stack bottom
+    # We map the first 1GB identically to the first 1GB of physical memory,
+    # move sp to the correct position in identically mapped area of kernel space.
     mov %rsp, %rdi
     xor %rsp, %rsp
     inc %rsp

+ 4 - 4
src/driver.rs

@@ -4,20 +4,20 @@ pub mod serial;
 pub mod timer;
 
 // TODO!!!: Put it somewhere else.
-pub(self) struct Port8 {
+pub struct Port8 {
     no: u16,
 }
 
 impl Port8 {
-    const fn new(no: u16) -> Self {
+    pub const fn new(no: u16) -> Self {
         Self { no }
     }
 
-    fn read(&self) -> u8 {
+    pub fn read(&self) -> u8 {
         arch::io::inb(self.no)
     }
 
-    fn write(&self, data: u8) {
+    pub fn write(&self, data: u8) {
         arch::io::outb(self.no, data)
     }
 }

+ 2 - 0
src/driver/timer.rs

@@ -4,6 +4,7 @@ const COUNT: Port8 = Port8::new(0x40);
 const CONTROL: Port8 = Port8::new(0x43);
 
 pub fn init() {
+    arch::interrupt::disable();
     // Set interval
     CONTROL.write(0x34);
 
@@ -11,4 +12,5 @@ pub fn init() {
     // 0x2e9a = 11930 = 100Hz
     COUNT.write(0x9a);
     COUNT.write(0x2e);
+    arch::interrupt::enable();
 }

+ 370 - 0
src/elf.rs

@@ -0,0 +1,370 @@
+use alloc::{ffi::CString, sync::Arc};
+use bitflags::bitflags;
+
+use crate::{
+    io::{RawBuffer, UninitBuffer},
+    kernel::{
+        constants::ENOEXEC,
+        mem::{FileMapping, MMList, Mapping, Permission, VAddr},
+        task::Thread,
+        user::{dataflow::CheckedUserPointer, UserPointerMut},
+        vfs::dentry::Dentry,
+    },
+    prelude::*,
+};
+
+#[repr(u8)]
+#[derive(Clone, Copy, PartialEq, Eq)]
+pub enum ElfFormat {
+    Elf32 = 1,
+    Elf64 = 2,
+}
+
+#[repr(u8)]
+#[derive(Clone, Copy, PartialEq, Eq)]
+pub enum ElfEndian {
+    Little = 1,
+    Big = 2,
+}
+
+#[repr(u8)]
+#[derive(Clone, Copy, PartialEq, Eq)]
+pub enum ElfABI {
+    // SystemV = 0,
+    Linux = 3,
+}
+
+#[repr(u16)]
+#[derive(Clone, Copy, PartialEq, Eq)]
+pub enum ElfType {
+    Relocatable = 1,
+    Executable = 2,
+    Dynamic = 3,
+    Core = 4,
+}
+
+#[repr(u16)]
+#[derive(Clone, Copy, PartialEq, Eq)]
+pub enum ElfArch {
+    X86 = 0x03,
+    Arm = 0x28,
+    IA64 = 0x32,
+    X86_64 = 0x3e,
+    AArch64 = 0xb7,
+    RiscV = 0xf3,
+}
+
+bitflags! {
+    #[derive(Default, Clone, Copy)]
+    pub struct Elf32PhFlags: u32 {
+        const Exec = 1;
+        const Write = 2;
+        const Read = 4;
+    }
+
+    #[derive(Default, Clone, Copy)]
+    pub struct Elf32ShFlags: u32 {
+        const Write = 1;
+        const Alloc = 2;
+        const Exec = 4;
+        const MaskProc = 0xf0000000;
+    }
+}
+
+#[derive(Default, Clone, Copy, PartialEq, Eq)]
+pub enum Elf32PhType {
+    #[default]
+    Null = 0,
+    Load = 1,
+    Dynamic = 2,
+    Interp = 3,
+    Note = 4,
+    Shlib = 5,
+    Phdr = 6,
+    Tls = 7,
+    Loos = 0x60000000,
+    Hios = 0x6fffffff,
+    Loproc = 0x70000000,
+    Hiproc = 0x7fffffff,
+}
+
+#[derive(Default, Clone, Copy, PartialEq, Eq)]
+pub enum Elf32ShType {
+    #[default]
+    Null = 0,
+    ProgBits = 1,
+    SymTab = 2,
+    StrTab = 3,
+    Rela = 4,
+    Hash = 5,
+    Dynamic = 6,
+    Note = 7,
+    NoBits = 8,
+    Rel = 9,
+    Shlib = 10,
+    DynSym = 11,
+    InitArray = 14,
+    FiniArray = 15,
+    PreInitArray = 16,
+    Group = 17,
+    SymTabShndx = 18,
+    Loos = 0x60000000,
+    Hios = 0x6fffffff,
+    Loproc = 0x70000000,
+    Hiproc = 0x7fffffff,
+}
+
+#[repr(C, packed)]
+#[derive(Clone, Copy)]
+pub struct Elf32Header {
+    /// ELF magic number: 0x7f, "ELF"
+    pub magic: [u8; 4],
+    pub format: ElfFormat,
+    pub endian: ElfEndian,
+    /// ELF version, should be 1
+    pub version: u8,
+    pub abi: ElfABI,
+    pub abi_version: u8,
+    padding: [u8; 7],
+    pub elf_type: ElfType,
+    pub arch: ElfArch,
+    /// ELF version, should be 1
+    pub version2: u32,
+    pub entry: u32,
+    pub ph_offset: u32,
+    pub sh_offset: u32,
+    pub flags: u32,
+    pub eh_size: u16,
+    pub ph_entry_size: u16,
+    pub ph_entry_count: u16,
+    pub sh_entry_size: u16,
+    pub sh_entry_count: u16,
+    pub sh_str_index: u16,
+}
+
+#[repr(C)]
+#[derive(Default, Clone, Copy)]
+pub struct Elf32PhEntry {
+    pub ph_type: Elf32PhType,
+    pub offset: u32,
+    pub vaddr: u32,
+    pub paddr: u32,
+    pub file_size: u32,
+    pub mem_size: u32,
+    pub flags: Elf32PhFlags,
+    /// `0` and `1` for no alignment, otherwise power of `2`
+    pub align: u32,
+}
+
+#[repr(C)]
+#[derive(Default, Clone, Copy)]
+pub struct Elf32ShEntry {
+    pub name_offset: u32,
+    pub sh_type: Elf32ShType,
+    pub flags: Elf32ShFlags,
+    pub addr: u32,
+    pub offset: u32,
+    pub size: u32,
+    pub link: u32,
+    pub info: u32,
+    pub addr_align: u32,
+    pub entry_size: u32,
+}
+
+pub struct ParsedElf32 {
+    entry: u32,
+    file: Arc<Dentry>,
+    phents: Vec<Elf32PhEntry>,
+    shents: Vec<Elf32ShEntry>,
+}
+
+const ELF_MAGIC: [u8; 4] = *b"\x7fELF";
+
+impl Elf32Header {
+    fn check_valid(&self) -> bool {
+        self.magic == ELF_MAGIC
+            && self.version == 1
+            && self.version2 == 1
+            && self.eh_size as usize == size_of::<Elf32Header>()
+            && self.ph_entry_size as usize == size_of::<Elf32PhEntry>()
+            && self.sh_entry_size as usize == size_of::<Elf32ShEntry>()
+    }
+}
+
+impl ParsedElf32 {
+    pub fn parse(file: Arc<Dentry>) -> KResult<Self> {
+        let mut header = UninitBuffer::<Elf32Header>::new();
+        file.read(&mut header, 0)?;
+
+        let header = header.assume_init().ok_or(ENOEXEC)?;
+        if !header.check_valid() {
+            return Err(ENOEXEC);
+        }
+
+        // TODO: Use `UninitBuffer` for `phents` and `shents`.
+        let mut phents = vec![Elf32PhEntry::default(); header.ph_entry_count as usize];
+        let nread = file.read(
+            &mut RawBuffer::new_from_slice(phents.as_mut_slice()),
+            header.ph_offset as usize,
+        )?;
+        if nread != header.ph_entry_count as usize * size_of::<Elf32PhEntry>() {
+            return Err(ENOEXEC);
+        }
+
+        let mut shents = vec![Elf32ShEntry::default(); header.sh_entry_count as usize];
+        let nread = file.read(
+            &mut RawBuffer::new_from_slice(shents.as_mut_slice()),
+            header.sh_offset as usize,
+        )?;
+        if nread != header.sh_entry_count as usize * size_of::<Elf32ShEntry>() {
+            return Err(ENOEXEC);
+        }
+
+        Ok(Self {
+            entry: header.entry,
+            file,
+            phents,
+            shents,
+        })
+    }
+
+    /// Load the ELF file into memory. Return the entry point address.
+    ///
+    /// We clear the user space and load the program headers into memory.
+    /// Can't make a way back if failed from now on.
+    ///
+    /// # Return
+    /// `(entry_ip, sp)`
+    pub fn load(
+        self,
+        mm_list: &MMList,
+        args: Vec<CString>,
+        envs: Vec<CString>,
+    ) -> KResult<(VAddr, VAddr)> {
+        mm_list.clear_user();
+
+        let mut data_segment_end = VAddr(0);
+        for phent in self
+            .phents
+            .into_iter()
+            .filter(|ent| ent.ph_type == Elf32PhType::Load)
+        {
+            let vaddr_start = VAddr(phent.vaddr as usize);
+            let vmem_vaddr_end = vaddr_start + phent.mem_size as usize;
+            let load_vaddr_end = vaddr_start + phent.file_size as usize;
+
+            let vaddr = vaddr_start.floor();
+            let vmem_len = vmem_vaddr_end.ceil() - vaddr;
+            let file_len = load_vaddr_end.ceil() - vaddr;
+            let file_offset = phent.offset as usize & !0xfff;
+
+            let permission = Permission {
+                write: phent.flags.contains(Elf32PhFlags::Write),
+                execute: phent.flags.contains(Elf32PhFlags::Exec),
+            };
+
+            if file_len != 0 {
+                let real_file_length = load_vaddr_end - vaddr;
+                mm_list.mmap_fixed(
+                    vaddr,
+                    file_len,
+                    Mapping::File(FileMapping::new(
+                        self.file.clone(),
+                        file_offset,
+                        real_file_length,
+                    )),
+                    permission,
+                )?;
+            }
+
+            if vmem_len > file_len {
+                mm_list.mmap_fixed(
+                    vaddr + file_len,
+                    vmem_len - file_len,
+                    Mapping::Anonymous,
+                    permission,
+                )?;
+            }
+
+            if vaddr + vmem_len > data_segment_end {
+                data_segment_end = vaddr + vmem_len;
+            }
+        }
+
+        mm_list.register_break(data_segment_end + 0x10000);
+
+        // Map stack area
+        mm_list.mmap_fixed(
+            VAddr(0xc0000000 - 0x800000), // Stack bottom is at 0xc0000000
+            0x800000,                     // 8MB stack size
+            Mapping::Anonymous,
+            Permission {
+                write: true,
+                execute: false,
+            },
+        )?;
+
+        // TODO!!!!!: A temporary workaround.
+        mm_list.switch_page_table();
+
+        let mut sp = 0xc0000000u32;
+        let arg_addrs = args
+            .into_iter()
+            .map(|arg| push_string(&mut sp, arg))
+            .collect::<Vec<_>>();
+
+        let env_addrs = envs
+            .into_iter()
+            .map(|env| push_string(&mut sp, env))
+            .collect::<Vec<_>>();
+
+        let longs = 2 // Null auxiliary vector entry
+            + env_addrs.len() + 1 // Envs + null
+            + arg_addrs.len() + 1 // Args + null
+            + 1; // argc
+
+        sp -= longs as u32 * 4;
+        sp &= !0xf; // Align to 16 bytes
+
+        let mut cursor = (0..longs)
+            .map(|idx| UserPointerMut::<u32>::new_vaddr(sp as usize + size_of::<u32>() * idx));
+
+        // argc
+        cursor.next().unwrap()?.write(arg_addrs.len() as u32)?;
+
+        // args
+        for arg_addr in arg_addrs.into_iter() {
+            cursor.next().unwrap()?.write(arg_addr)?;
+        }
+        cursor.next().unwrap()?.write(0)?; // null
+
+        // envs
+        for env_addr in env_addrs.into_iter() {
+            cursor.next().unwrap()?.write(env_addr)?;
+        }
+        cursor.next().unwrap()?.write(0)?; // null
+
+        // Null auxiliary vector
+        cursor.next().unwrap()?.write(0)?; // AT_NULL
+        cursor.next().unwrap()?.write(0)?; // AT_NULL
+
+        // TODO!!!!!: A temporary workaround.
+        Thread::current().process.mm_list.switch_page_table();
+
+        assert!(cursor.next().is_none());
+        Ok((VAddr(self.entry as usize), VAddr(sp as usize)))
+    }
+}
+
+fn push_string(sp: &mut u32, string: CString) -> u32 {
+    let data = string.as_bytes_with_nul();
+    let new_sp = (*sp - data.len() as u32) & !0x3; // Align to 4 bytes
+
+    CheckedUserPointer::new(new_sp as *const u8, data.len())
+        .unwrap()
+        .write(data.as_ptr() as _, data.len())
+        .unwrap();
+
+    *sp = new_sp;
+    new_sp
+}

+ 1 - 1
src/fs/procfs.rs

@@ -69,7 +69,7 @@ impl ProcFsNode {
 }
 
 define_struct_inode! {
-    struct FileInode {
+    pub struct FileInode {
         file: Box<dyn ProcFsFile>,
     }
 }

+ 8 - 0
src/io.rs

@@ -82,6 +82,14 @@ impl<'lt, T: Copy + Sized> UninitBuffer<'lt, T> {
 
         Ok(unsafe { self.data.assume_init_ref() })
     }
+
+    pub fn assume_init(self) -> Option<T> {
+        if self.buffer.filled() {
+            Some(unsafe { *self.data.assume_init() })
+        } else {
+            None
+        }
+    }
 }
 
 impl<'lt, T: Copy + Sized> Buffer for UninitBuffer<'lt, T> {

+ 33 - 28
src/kernel.ld

@@ -33,6 +33,7 @@ SECTIONS
     .text :
         AT(LOADADDR(.stage1) + SIZEOF(.stage1))
     {
+        KIMAGE_START = .;
         TEXT_START = .;
         *(.text)
         *(.text*)
@@ -57,15 +58,19 @@ SECTIONS
         end_ctors = .;
 
         . = ALIGN(16);
-        FIX_START = .;
+        _fix_start = .;
         KEEP(*(.fix));
-        FIX_END = .;
+        _fix_end = .;
 
         . = ALIGN(16);
         BSS_ADDR = .;
         QUAD(ABSOLUTE(BSS_START));
         BSS_LENGTH = .;
         QUAD(BSS_END - BSS_START);
+        FIX_START = .;
+        QUAD(ABSOLUTE(_fix_start));
+        FIX_END = .;
+        QUAD(ABSOLUTE(_fix_end));
 
         . = ALIGN(0x1000);
         RODATA_END = .;
@@ -113,42 +118,42 @@ SECTIONS
     } > KIMAGE
 
     /* Stabs debugging sections.  */
-    .stab          0 : { *(.stab) }
-    .stabstr       0 : { *(.stabstr) }
-    .stab.excl     0 : { *(.stab.excl) }
-    .stab.exclstr  0 : { *(.stab.exclstr) }
-    .stab.index    0 : { *(.stab.index) }
-    .stab.indexstr 0 : { *(.stab.indexstr) }
-    .comment       0 : { *(.comment) }
+    .stab          0 : { KEEP(*(.stab)); }
+    .stabstr       0 : { KEEP(*(.stabstr)); }
+    .stab.excl     0 : { KEEP(*(.stab.excl)); }
+    .stab.exclstr  0 : { KEEP(*(.stab.exclstr)); }
+    .stab.index    0 : { KEEP(*(.stab.index)); }
+    .stab.indexstr 0 : { KEEP(*(.stab.indexstr)); }
+    .comment       0 : { KEEP(*(.comment)); }
     /* DWARF debug sections.
        Symbols in the DWARF debugging sections are relative to the beginning
        of the section so we begin them at 0.  */
     /* DWARF 1 */
-    .debug          0 : { *(.debug) }
-    .line           0 : { *(.line) }
+    .debug          0 : { KEEP(*(.debug)); }
+    .line           0 : { KEEP(*(.line)); }
     /* GNU DWARF 1 extensions */
-    .debug_srcinfo  0 : { *(.debug_srcinfo) }
-    .debug_sfnames  0 : { *(.debug_sfnames) }
+    .debug_srcinfo  0 : { KEEP(*(.debug_srcinfo)); }
+    .debug_sfnames  0 : { KEEP(*(.debug_sfnames)); }
     /* DWARF 1.1 and DWARF 2 */
-    .debug_aranges  0 : { *(.debug_aranges) }
-    .debug_pubnames 0 : { *(.debug_pubnames) }
+    .debug_aranges  0 : { KEEP(*(.debug_aranges)); }
+    .debug_pubnames 0 : { KEEP(*(.debug_pubnames)); }
     /* DWARF 2 */
-    .debug_info     0 : { *(.debug_info) }
-    .debug_abbrev   0 : { *(.debug_abbrev) }
-    .debug_line     0 : { *(.debug_line) }
-    .debug_frame    0 : { *(.debug_frame) }
-    .debug_str      0 : { *(.debug_str) }
-    .debug_loc      0 : { *(.debug_loc) }
-    .debug_macinfo  0 : { *(.debug_macinfo) }
+    .debug_info     0 : { KEEP(*(.debug_info)); }
+    .debug_abbrev   0 : { KEEP(*(.debug_abbrev)); }
+    .debug_line     0 : { KEEP(*(.debug_line)); }
+    .debug_frame    0 : { KEEP(*(.debug_frame)); }
+    .debug_str      0 : { KEEP(*(.debug_str)); }
+    .debug_loc      0 : { KEEP(*(.debug_loc)); }
+    .debug_macinfo  0 : { KEEP(*(.debug_macinfo)); }
     /* SGI/MIPS DWARF 2 extensions */
-    .debug_weaknames 0 : { *(.debug_weaknames) }
-    .debug_funcnames 0 : { *(.debug_funcnames) }
-    .debug_typenames 0 : { *(.debug_typenames) }
-    .debug_varnames  0 : { *(.debug_varnames) }
+    .debug_weaknames 0 : { KEEP(*(.debug_weaknames)); }
+    .debug_funcnames 0 : { KEEP(*(.debug_funcnames)); }
+    .debug_typenames 0 : { KEEP(*(.debug_typenames)); }
+    .debug_varnames  0 : { KEEP(*(.debug_varnames)); }
 
     /* DWARF Other */
-    .debug_ranges  0 : { *(.debug_ranges) }
-    .debug_line_str 0 : { *(.debug_line_str) }
+    .debug_ranges  0 : { KEEP(*(.debug_ranges)); }
+    .debug_line_str 0 : { KEEP(*(.debug_line_str)); }
     /* Rust stuff */
 
     /DISCARD/ :

+ 1 - 1
src/kernel.rs

@@ -1,5 +1,6 @@
 pub mod block;
 pub mod console;
+pub mod constants;
 pub mod interrupt;
 pub mod mem;
 pub mod syscall;
@@ -9,7 +10,6 @@ pub mod user;
 pub mod vfs;
 
 mod chardev;
-mod constants;
 mod terminal;
 
 pub use chardev::{CharDevice, CharDeviceType, VirtualCharDevice};

+ 7 - 29
src/kernel/async/lock.cc

@@ -1,5 +1,4 @@
 #include <assert.h>
-#include <stdint.h>
 
 #include <kernel/async/lock.hpp>
 
@@ -49,33 +48,20 @@ static inline void _restore_interrupt_state(lock_context_t context) {
         :);
 }
 
-// TODO: mark as _per_cpu
-static inline preempt_count_t& _preempt_count() {
-    static preempt_count_t _preempt_count;
-    assert(_preempt_count >= 0);
-    return _preempt_count;
-}
+extern "C" void r_preempt_disable();
+extern "C" void r_preempt_enable();
+extern "C" unsigned long r_preempt_count();
 
 void preempt_disable() {
-    ++_preempt_count();
-    asm volatile("" : : : "memory");
+    r_preempt_disable();
 }
 
 void preempt_enable() {
-    asm volatile("" : : : "memory");
-    --_preempt_count();
-}
-
-extern "C" void r_preempt_disable() {
-    preempt_disable();
+    r_preempt_enable();
 }
 
-extern "C" void r_preempt_enable() {
-    preempt_enable();
-}
-
-preempt_count_t preempt_count() {
-    return _preempt_count();
+unsigned long preempt_count() {
+    return r_preempt_count();
 }
 
 void spin_lock(spinlock_t& lock) {
@@ -107,14 +93,6 @@ mutex::~mutex() {
     assert(m_lock == 0);
 }
 
-void mutex::lock() {
-    spin_lock(m_lock);
-}
-
-void mutex::unlock() {
-    spin_unlock(m_lock);
-}
-
 lock_context_t mutex::lock_irq() {
     return spin_lock_irqsave(m_lock);
 }

+ 3 - 0
src/kernel/console.rs

@@ -79,6 +79,9 @@ macro_rules! println_info {
 }
 
 macro_rules! println_fatal {
+    () => {
+        $crate::println!("[kernel:fatal] ")
+    };
     ($($arg:tt)*) => {
         $crate::println!("[kernel:fatal] {}", format_args!($($arg)*))
     };

+ 2 - 0
src/kernel/constants.rs

@@ -16,6 +16,8 @@ pub const SIG_SETMASK: u32 = 2;
 pub const CLOCK_REALTIME: u32 = 0;
 pub const CLOCK_MONOTONIC: u32 = 1;
 
+pub const ENOEXEC: u32 = 8;
+
 bitflags! {
     pub struct UserMmapFlags: u32 {
         const MAP_SHARED = 0x01;

+ 2 - 1
src/kernel/hw/pci.cc

@@ -87,10 +87,11 @@ int register_driver_r(uint16_t vendor, uint16_t device,
 
 namespace kernel::kinit {
 
-void init_pci() {
+extern "C" void init_pci() {
     using namespace hw::acpi;
     using namespace hw::pci;
 
+    assert(parse_acpi_tables() == 0);
     auto* mcfg = (MCFG*)get_table("MCFG");
     assert(mcfg);
 

+ 0 - 142
src/kernel/interrupt.cpp

@@ -1,142 +0,0 @@
-#include "kernel/async/lock.hpp"
-
-#include <list>
-#include <vector>
-
-#include <assert.h>
-#include <stdint.h>
-
-#include <types/types.h>
-
-#include <kernel/hw/port.hpp>
-#include <kernel/interrupt.hpp>
-#include <kernel/irq.hpp>
-#include <kernel/log.hpp>
-#include <kernel/mem/paging.hpp>
-#include <kernel/process.hpp>
-#include <kernel/vfs.hpp>
-
-#define KERNEL_INTERRUPT_GATE_TYPE (0x8e)
-#define USER_INTERRUPT_GATE_TYPE (0xee)
-
-constexpr kernel::hw::p8 port_pic1_command{0x20};
-constexpr kernel::hw::p8 port_pic1_data{0x21};
-constexpr kernel::hw::p8 port_pic2_command{0xa0};
-constexpr kernel::hw::p8 port_pic2_data{0xa1};
-
-struct IDT_entry {
-    uint16_t offset_low;
-    uint16_t segment;
-
-    uint8_t IST;
-    uint8_t attributes;
-
-    uint16_t offset_mid;
-    uint32_t offset_high;
-    uint32_t reserved;
-};
-
-static struct IDT_entry IDT[256];
-
-extern "C" uintptr_t ISR_START_ADDR;
-
-static inline void set_idt_entry(IDT_entry (&idt)[256], int n, uintptr_t offset, uint16_t selector,
-                                 uint8_t type) {
-    idt[n].offset_low = offset & 0xffff;
-    idt[n].segment = selector;
-    idt[n].IST = 0;
-    idt[n].attributes = type;
-    idt[n].offset_mid = (offset >> 16) & 0xffff;
-    idt[n].offset_high = (offset >> 32) & 0xffffffff;
-    idt[n].reserved = 0;
-}
-
-using kernel::irq::irq_handler_t;
-static std::vector<std::list<irq_handler_t>> s_irq_handlers;
-
-void kernel::kinit::init_interrupt() {
-    for (int i = 0; i < 0x30; ++i)
-        set_idt_entry(IDT, i, ISR_START_ADDR + 8 * i, 0x08, KERNEL_INTERRUPT_GATE_TYPE);
-    set_idt_entry(IDT, 0x80, ISR_START_ADDR + 8 * 0x80, 0x08, USER_INTERRUPT_GATE_TYPE);
-
-    uint64_t idt_descriptor[2];
-    idt_descriptor[0] = (sizeof(IDT_entry) * 256) << 48;
-    idt_descriptor[1] = (uintptr_t)IDT;
-
-    // initialize PIC
-    asm volatile("lidt (%0)" : : "r"((uintptr_t)idt_descriptor + 6) :);
-    s_irq_handlers.resize(16);
-
-    // TODO: move this to timer driver
-    kernel::irq::register_handler(0, []() {
-        kernel::hw::timer::inc_tick();
-        if (async::preempt_count() == 0)
-            schedule_now();
-    });
-
-    port_pic1_command = 0x11; // edge trigger mode
-    port_pic1_data = 0x20;    // start from int 0x20
-    port_pic1_data = 0x04;    // PIC1 is connected to IRQ2 (1 << 2)
-    port_pic1_data = 0x01;    // no buffer mode
-
-    port_pic2_command = 0x11; // edge trigger mode
-    port_pic2_data = 0x28;    // start from int 0x28
-    port_pic2_data = 0x02;    // connected to IRQ2
-    port_pic2_data = 0x01;    // no buffer mode
-
-    // allow all the interrupts
-    port_pic1_data = 0x00;
-    port_pic2_data = 0x00;
-}
-
-void kernel::irq::register_handler(int irqno, irq_handler_t handler) {
-    s_irq_handlers[irqno].emplace_back(std::move(handler));
-}
-
-static inline void fault_handler(interrupt_stack* context, mmx_registers*) {
-    switch (context->int_no) {
-        case 6:
-        case 8: {
-            assert(false);
-            if (!current_process->attr.system)
-                kill_current(SIGSEGV); // noreturn
-        } break;
-        case 13: {
-            if (!current_process->attr.system)
-                kill_current(SIGILL); // noreturn
-        } break;
-        case 14: {
-            kernel::mem::paging::handle_page_fault(context);
-            return;
-        } break;
-    }
-
-    // fault can not be resolved
-    freeze();
-}
-
-extern "C" void irq_handler_rust(int irqno);
-
-static inline void irq_handler(interrupt_stack* context, mmx_registers*) {
-    int irqno = context->int_no - 0x20;
-
-    constexpr uint8_t PIC_EOI = 0x20;
-
-    for (const auto& handler : s_irq_handlers[irqno])
-        handler();
-
-    irq_handler_rust(irqno);
-
-    port_pic1_command = PIC_EOI;
-    if (irqno >= 8)
-        port_pic2_command = PIC_EOI;
-}
-
-extern "C" void interrupt_handler(interrupt_stack* context, mmx_registers* mmxregs) {
-    if (context->int_no < 0x20) // interrupt is a fault
-        fault_handler(context, mmxregs);
-    else if (context->int_no == 0x80) // syscall by int 0x80
-        kernel::handle_syscall32(context->regs.rax, context, mmxregs);
-    else
-        irq_handler(context, mmxregs);
-}

+ 118 - 6
src/kernel/interrupt.rs

@@ -4,23 +4,111 @@ use alloc::vec::Vec;
 
 use lazy_static::lazy_static;
 
-use crate::bindings::root::EINVAL;
-use crate::Spin;
+use crate::bindings::root::{interrupt_stack, mmx_registers, EINVAL};
+use crate::{driver::Port8, prelude::*};
+
+use super::mem::handle_page_fault;
+use super::syscall::handle_syscall32;
+use super::task::{ProcessList, Signal};
+
+const PIC1_COMMAND: Port8 = Port8::new(0x20);
+const PIC1_DATA: Port8 = Port8::new(0x21);
+const PIC2_COMMAND: Port8 = Port8::new(0xA0);
+const PIC2_DATA: Port8 = Port8::new(0xA1);
+
+#[repr(C)]
+#[derive(Clone, Copy)]
+struct IDTEntry {
+    offset_low: u16,
+    selector: u16,
+
+    interrupt_stack: u8,
+    attributes: u8,
+
+    offset_mid: u16,
+    offset_high: u32,
+    reserved: u32,
+}
+
+extern "C" {
+    static ISR_START_ADDR: usize;
+}
 
 lazy_static! {
     static ref IRQ_HANDLERS: Spin<[Vec<Box<dyn Fn() + Send>>; 16]> =
         Spin::new(core::array::from_fn(|_| vec![]));
+    static ref IDT: [IDTEntry; 256] = core::array::from_fn(|idx| {
+        match idx {
+            0..0x80 => IDTEntry::new(unsafe { ISR_START_ADDR } + 8 * idx, 0x08, 0x8e),
+            0x80 => IDTEntry::new(unsafe { ISR_START_ADDR } + 8 * idx, 0x08, 0xee),
+            _ => IDTEntry::null(),
+        }
+    });
 }
 
-#[no_mangle]
-pub extern "C" fn irq_handler_rust(irqno: core::ffi::c_int) {
-    assert!(irqno >= 0 && irqno < 16);
+impl IDTEntry {
+    const fn new(offset: usize, selector: u16, attributes: u8) -> Self {
+        Self {
+            offset_low: offset as u16,
+            selector,
+            interrupt_stack: 0,
+            attributes,
+            offset_mid: (offset >> 16) as u16,
+            offset_high: (offset >> 32) as u32,
+            reserved: 0,
+        }
+    }
 
-    let handlers = IRQ_HANDLERS.lock();
+    const fn null() -> Self {
+        Self {
+            offset_low: 0,
+            selector: 0,
+            interrupt_stack: 0,
+            attributes: 0,
+            offset_mid: 0,
+            offset_high: 0,
+            reserved: 0,
+        }
+    }
+}
+
+fn irq_handler(irqno: usize) {
+    assert!(irqno < 16);
 
+    let handlers = IRQ_HANDLERS.lock();
     for handler in handlers[irqno as usize].iter() {
         handler();
     }
+
+    PIC1_COMMAND.write(0x20); // EOI
+    if irqno >= 8 {
+        PIC2_COMMAND.write(0x20); // EOI
+    }
+}
+
+fn fault_handler(int_stack: &mut interrupt_stack) {
+    match int_stack.int_no {
+        // Invalid Op or Double Fault
+        14 => handle_page_fault(int_stack),
+        13 if int_stack.ss == 0 => ProcessList::kill_current(Signal::SIGILL),
+        6 | 8 if int_stack.ss == 0 => ProcessList::kill_current(Signal::SIGSEGV),
+        _ => panic!("Unhandled fault: {}", int_stack.int_no),
+    }
+}
+
+#[no_mangle]
+pub extern "C" fn interrupt_handler(int_stack: *mut interrupt_stack, mmxregs: *mut mmx_registers) {
+    let int_stack = unsafe { &mut *int_stack };
+    let mmxregs = unsafe { &mut *mmxregs };
+
+    match int_stack.int_no {
+        // Fault
+        0..0x20 => fault_handler(int_stack),
+        // Syscall
+        0x80 => handle_syscall32(int_stack.regs.rax as usize, int_stack, mmxregs),
+        // IRQ
+        no => irq_handler(no as usize - 0x20),
+    }
 }
 
 pub fn register_irq_handler<F>(irqno: i32, handler: F) -> Result<(), u32>
@@ -34,3 +122,27 @@ where
     IRQ_HANDLERS.lock_irq()[irqno as usize].push(Box::new(handler));
     Ok(())
 }
+
+pub fn init() -> KResult<()> {
+    arch::x86_64::interrupt::lidt(
+        IDT.as_ptr() as usize,
+        (size_of::<IDTEntry>() * 256 - 1) as u16,
+    );
+
+    // Initialize PIC
+    PIC1_COMMAND.write(0x11); // edge trigger mode
+    PIC1_DATA.write(0x20); // IRQ 0-7 offset
+    PIC1_DATA.write(0x04); // cascade with slave PIC
+    PIC1_DATA.write(0x01); // no buffer mode
+
+    PIC2_COMMAND.write(0x11); // edge trigger mode
+    PIC2_DATA.write(0x28); // IRQ 8-15 offset
+    PIC2_DATA.write(0x02); // cascade with master PIC
+    PIC2_DATA.write(0x01); // no buffer mode
+
+    // Allow all IRQs
+    PIC1_DATA.write(0x0);
+    PIC2_DATA.write(0x0);
+
+    Ok(())
+}

+ 2 - 2
src/kernel/mem.rs

@@ -7,6 +7,6 @@ mod page_table;
 mod vrange;
 
 pub(self) use mm_area::MMArea;
-pub use mm_list::{MMList, Mapping, Permission, FileMapping};
-pub(self) use page_table::{PTEIterator, PageTable, PTE};
+pub use mm_list::{handle_page_fault, FileMapping, MMList, Mapping, PageFaultError, Permission};
+pub(self) use page_table::{PageTable, PTE};
 pub use vrange::{VAddr, VRange};

+ 2 - 2
src/kernel/mem/mm_area.rs

@@ -5,8 +5,8 @@ use super::{Mapping, Permission, VAddr, VRange};
 #[derive(Debug)]
 pub struct MMArea {
     range: UnsafeCell<VRange>,
-    mapping: Mapping,
-    permission: Permission,
+    pub(super) mapping: Mapping,
+    pub(super) permission: Permission,
 }
 
 impl Clone for MMArea {

+ 45 - 17
src/kernel/mem/mm_list.rs

@@ -1,3 +1,5 @@
+mod page_fault;
+
 use crate::prelude::*;
 
 use alloc::{collections::btree_set::BTreeSet, sync::Arc};
@@ -7,10 +9,15 @@ use crate::kernel::vfs::dentry::Dentry;
 
 use super::{MMArea, PageTable, VAddr, VRange};
 
+pub use page_fault::{handle_page_fault, PageFaultError};
+
 #[derive(Debug, Clone)]
 pub struct FileMapping {
     file: Arc<Dentry>,
+    /// Offset in the file, aligned to 4KB boundary.
     offset: usize,
+    /// Length of the mapping. Exceeding part will be zeroed.
+    length: usize,
 }
 
 #[derive(Debug, Clone, Copy)]
@@ -37,17 +44,29 @@ struct MMListInner {
 pub struct MMList {
     /// # Safety
     /// This field might be used in IRQ context, so it should be locked with `lock_irq()`.
-    inner: Spin<MMListInner>,
+    inner: Mutex<MMListInner>,
 }
 
 impl FileMapping {
-    pub fn new(file: Arc<Dentry>, offset: usize) -> Self {
+    pub fn new(file: Arc<Dentry>, offset: usize, length: usize) -> Self {
         assert_eq!(offset & 0xfff, 0);
-        Self { file, offset }
+        Self {
+            file,
+            offset,
+            length,
+        }
     }
 
     pub fn offset(&self, offset: usize) -> Self {
-        Self::new(self.file.clone(), self.offset + offset)
+        if self.length <= offset {
+            Self::new(self.file.clone(), self.offset + self.length, 0)
+        } else {
+            Self::new(
+                self.file.clone(),
+                self.offset + offset,
+                self.length - offset,
+            )
+        }
     }
 }
 
@@ -66,7 +85,7 @@ impl MMListInner {
     }
 
     fn check_overlapping_addr(&self, addr: VAddr) -> bool {
-        addr.is_user() && self.overlapping_addr(addr).is_some()
+        addr.is_user() && self.overlapping_addr(addr).is_none()
     }
 
     fn overlapping_range(&self, range: VRange) -> impl DoubleEndedIterator<Item = &MMArea> + '_ {
@@ -74,7 +93,7 @@ impl MMListInner {
     }
 
     fn check_overlapping_range(&self, range: VRange) -> bool {
-        range.is_user() && self.overlapping_range(range).next().is_some()
+        range.is_user() && self.overlapping_range(range).next().is_none()
     }
 
     fn find_available(&self, hint: VAddr, len: usize) -> Option<VAddr> {
@@ -210,7 +229,7 @@ impl MMListInner {
 impl MMList {
     pub fn new() -> Arc<Self> {
         Arc::new(Self {
-            inner: Spin::new(MMListInner {
+            inner: Mutex::new(MMListInner {
                 areas: BTreeSet::new(),
                 page_table: PageTable::new(),
                 break_start: None,
@@ -226,7 +245,7 @@ impl MMList {
         let inner = self.inner.lock_irq();
 
         let list = Arc::new(Self {
-            inner: Spin::new(MMListInner {
+            inner: Mutex::new(MMListInner {
                 areas: inner.areas.clone(),
                 page_table: PageTable::new(),
                 break_start: inner.break_start,
@@ -239,8 +258,8 @@ impl MMList {
             let list_inner = list.inner.lock();
 
             for area in list_inner.areas.iter() {
-                let new_iter = list_inner.page_table.iter_user(area.range());
-                let old_iter = inner.page_table.iter_user(area.range());
+                let new_iter = list_inner.page_table.iter_user(area.range()).unwrap();
+                let old_iter = inner.page_table.iter_user(area.range()).unwrap();
 
                 for (new, old) in new_iter.zip(old_iter) {
                     new.setup_cow(old);
@@ -267,20 +286,18 @@ impl MMList {
         self.inner.lock_irq().unmap(start, len)
     }
 
-    pub fn mmap(
+    pub fn mmap_hint(
         &self,
-        at: VAddr,
+        hint: VAddr,
         len: usize,
         mapping: Mapping,
         permission: Permission,
-        fixed: bool,
     ) -> KResult<VAddr> {
         let mut inner = self.inner.lock_irq();
-        match inner.mmap(at, len, mapping.clone(), permission) {
-            Ok(()) => Ok(at),
-            Err(EEXIST) if fixed => Err(EEXIST),
+        match inner.mmap(hint, len, mapping.clone(), permission) {
+            Ok(()) => Ok(hint),
             Err(EEXIST) => {
-                let at = inner.find_available(at, len).ok_or(ENOMEM)?;
+                let at = inner.find_available(hint, len).ok_or(ENOMEM)?;
                 inner.mmap(at, len, mapping, permission)?;
                 Ok(at)
             }
@@ -288,6 +305,17 @@ impl MMList {
         }
     }
 
+    pub fn mmap_fixed(
+        &self,
+        at: VAddr,
+        len: usize,
+        mapping: Mapping,
+        permission: Permission,
+    ) -> KResult<VAddr> {
+        let mut inner = self.inner.lock_irq();
+        inner.mmap(at, len, mapping.clone(), permission).map(|_| at)
+    }
+
     pub fn set_break(&self, pos: Option<VAddr>) -> VAddr {
         self.inner.lock_irq().set_break(pos)
     }

+ 202 - 0
src/kernel/mem/mm_list/page_fault.rs

@@ -0,0 +1,202 @@
+use bindings::kernel::mem::paging::pfn_to_page;
+use bindings::{PA_A, PA_ANON, PA_COW, PA_MMAP, PA_P, PA_RW};
+use bitflags::bitflags;
+
+use crate::bindings::root::interrupt_stack;
+use crate::kernel::mem::paging::{Page, PageBuffer};
+use crate::kernel::mem::phys::{CachedPP, PhysPtr};
+use crate::kernel::mem::{Mapping, VRange};
+use crate::kernel::task::{ProcessList, Signal, Thread};
+use crate::prelude::*;
+
+use super::{MMList, VAddr};
+
+bitflags! {
+    pub struct PageFaultError: u64 {
+        const Present = 0x0001;
+        const Write = 0x0002;
+        const User = 0x0004;
+        const ReservedSet = 0x0008;
+        const InstructionFetch = 0x0010;
+        const ProtectionKey = 0x0020;
+        const SGX = 0x8000;
+    }
+}
+
+#[repr(C)]
+struct FixEntry {
+    start: u64,
+    length: u64,
+    jump_address: u64,
+    op_type: u64,
+}
+
+impl MMList {
+    fn handle_page_fault(
+        &self,
+        int_stack: &mut interrupt_stack,
+        addr: VAddr,
+        error: PageFaultError,
+    ) -> Result<(), Signal> {
+        let inner = self.inner.lock();
+        let area = match inner.areas.get(&VRange::from(addr)) {
+            Some(area) => area,
+            None => {
+                if error.contains(PageFaultError::User) {
+                    return Err(Signal::SIGBUS);
+                } else {
+                    try_page_fault_fix(int_stack, addr);
+                    return Ok(());
+                }
+            }
+        };
+
+        // User access permission violation, check user access permission.
+        if error.contains(PageFaultError::User | PageFaultError::Present) {
+            if error.contains(PageFaultError::Write) && !area.permission.write {
+                ProcessList::kill_current(Signal::SIGSEGV)
+            }
+
+            if error.contains(PageFaultError::InstructionFetch) && !area.permission.execute {
+                ProcessList::kill_current(Signal::SIGSEGV)
+            }
+        }
+
+        let pte = inner
+            .page_table
+            .iter_user(VRange::new(addr.floor(), addr.floor() + 0x1000))
+            .unwrap()
+            .next()
+            .expect("If we can find the mapped area, we should be able to find the PTE");
+
+        let is_mapped = matches!(&area.mapping, Mapping::File(_));
+        if !is_mapped && !error.contains(PageFaultError::Present) {
+            try_page_fault_fix(int_stack, addr);
+            return Ok(());
+        }
+
+        let mut pfn = pte.pfn();
+        let mut attributes = pte.attributes();
+
+        if attributes & PA_COW as usize != 0 {
+            attributes &= !PA_COW as usize;
+            if area.permission.write {
+                attributes |= PA_RW as usize;
+            } else {
+                attributes &= !PA_RW as usize;
+            }
+
+            // TODO!!!: Change this.
+            let page = unsafe { pfn_to_page(pfn).as_mut().unwrap() };
+            if page.refcount == 1 {
+                pte.set_attributes(attributes);
+                return Ok(());
+            }
+
+            let new_page = Page::alloc_one();
+            if attributes & PA_ANON as usize != 0 {
+                new_page.zero();
+            } else {
+                new_page
+                    .as_cached()
+                    .as_mut_slice::<u8>(0x1000)
+                    .copy_from_slice(CachedPP::new(pfn).as_slice(0x1000));
+            }
+
+            attributes &= !(PA_A | PA_ANON) as usize;
+            page.refcount -= 1;
+
+            pfn = new_page.into_pfn();
+            pte.set(pfn, attributes);
+        }
+
+        // TODO: shared mapping
+        if attributes & PA_MMAP as usize != 0 {
+            attributes |= PA_P as usize;
+
+            if let Mapping::File(mapping) = &area.mapping {
+                let load_offset = addr.floor() - area.range().start();
+                if load_offset < mapping.length {
+                    // SAFETY: Since we are here, the `pfn` must refer to a valid buddy page.
+                    let page = unsafe { Page::from_pfn(pfn, 0) };
+                    let nread = mapping
+                        .file
+                        .read(
+                            &mut PageBuffer::new(page.clone()),
+                            mapping.offset + load_offset,
+                        )
+                        .map_err(|_| Signal::SIGBUS)?;
+
+                    if nread < page.len() {
+                        page.as_cached().as_mut_slice::<u8>(0x1000)[nread..].fill(0);
+                    }
+
+                    if mapping.length - load_offset < 0x1000 {
+                        let length_to_end = mapping.length - load_offset;
+                        page.as_cached().as_mut_slice::<u8>(0x1000)[length_to_end..].fill(0);
+                    }
+                }
+                // Otherwise, the page is kept zero emptied.
+
+                attributes &= !PA_MMAP as usize;
+                pte.set_attributes(attributes);
+            } else {
+                panic!("Anonymous mapping should not be PA_MMAP");
+            }
+        }
+
+        Ok(())
+    }
+}
+
+extern "C" {
+    static FIX_START: *const FixEntry;
+    static FIX_END: *const FixEntry;
+}
+
+/// Try to fix the page fault by jumping to the `error` address.
+///
+/// Panic if we can't find the `ip` in the fix list.
+fn try_page_fault_fix(int_stack: &mut interrupt_stack, addr: VAddr) {
+    let ip = int_stack.v_rip as u64;
+    // TODO: Use `op_type` to fix.
+
+    // SAFETY: `FIX_START` and `FIX_END` are defined in the linker script in `.rodata` section.
+    let entries = unsafe {
+        core::slice::from_raw_parts(
+            FIX_START,
+            (FIX_END as usize - FIX_START as usize) / size_of::<FixEntry>(),
+        )
+    };
+
+    for entry in entries.iter() {
+        if ip >= entry.start && ip < entry.start + entry.length {
+            int_stack.v_rip = entry.jump_address as usize;
+            return;
+        }
+    }
+
+    kernel_page_fault_die(addr, ip as usize)
+}
+
+fn kernel_page_fault_die(vaddr: VAddr, ip: usize) -> ! {
+    panic!(
+        "Invalid kernel mode memory access to {:#8x} while executing the instruction at {:#8x}",
+        vaddr.0, ip
+    )
+}
+
+pub fn handle_page_fault(int_stack: &mut interrupt_stack) {
+    let error = PageFaultError::from_bits_truncate(int_stack.error_code);
+    let vaddr = VAddr(arch::x86_64::vm::get_cr2());
+
+    let result = Thread::current()
+        .process
+        .mm_list
+        .handle_page_fault(int_stack, vaddr, error);
+
+    if let Err(signal) = result {
+        println_debug!("Page fault in user space at {:#x}", vaddr.0);
+        ProcessList::kill_current(signal)
+    }
+}

+ 53 - 39
src/kernel/mem/page_table.rs

@@ -1,3 +1,5 @@
+use lazy_static::lazy_static;
+
 use crate::prelude::*;
 
 use crate::bindings::root::{EINVAL, KERNEL_PML4};
@@ -9,8 +11,6 @@ use super::{
 };
 use super::{MMArea, Permission};
 
-const EMPTY_PAGE_PFN: usize = 0x8000;
-
 const PA_P: usize = 0x001;
 const PA_RW: usize = 0x002;
 const PA_US: usize = 0x004;
@@ -35,22 +35,30 @@ pub struct PageTable {
     page: Page,
 }
 
-pub struct PTEIterator<'lt, const Kernel: bool> {
+pub struct PTEIterator<'lt, const KERNEL: bool> {
     count: usize,
     i4: u16,
     i3: u16,
     i2: u16,
     i1: u16,
-    p4: Page,
-    p3: Page,
-    p2: Page,
-    p1: Page,
+    p4: CachedPP,
+    p3: CachedPP,
+    p2: CachedPP,
+    p1: CachedPP,
 
     start: VAddr,
     end: VAddr,
     _phantom: core::marker::PhantomData<&'lt ()>,
 }
 
+lazy_static! {
+    static ref EMPTY_PAGE: Page = {
+        let page = Page::alloc_one();
+        page.zero();
+        page
+    };
+}
+
 impl PTE {
     pub fn is_user(&self) -> bool {
         self.0 & PA_US != 0
@@ -61,11 +69,11 @@ impl PTE {
     }
 
     pub fn pfn(&self) -> usize {
-        self.0 & !0xfff
+        self.0 & !PA_MASK
     }
 
     pub fn attributes(&self) -> usize {
-        self.0 & 0xfff
+        self.0 & PA_MASK
     }
 
     pub fn set(&mut self, pfn: usize, attributes: usize) {
@@ -80,7 +88,7 @@ impl PTE {
         self.set(self.pfn(), attributes)
     }
 
-    pub fn parse_page_table(&mut self, kernel: bool) -> Page {
+    fn parse_page_table(&mut self, kernel: bool) -> CachedPP {
         let attributes = if kernel {
             PA_P | PA_RW | PA_G
         } else {
@@ -88,19 +96,20 @@ impl PTE {
         };
 
         if self.is_present() {
-            Page::get(self.pfn(), 0)
+            CachedPP::new(self.pfn())
         } else {
             let page = Page::alloc_one();
+            let pp = page.as_cached();
             page.zero();
-            self.set(page.as_phys(), attributes);
 
-            page
+            self.set(page.into_pfn(), attributes);
+            pp
         }
     }
 
     pub fn setup_cow(&mut self, from: &mut Self) {
         self.set(
-            Page::get(from.pfn(), 0).into_pfn(),
+            unsafe { Page::from_pfn(from.pfn(), 0) }.into_pfn(),
             (from.attributes() & !(PA_RW | PA_A | PA_D)) | PA_COW,
         );
 
@@ -115,21 +124,22 @@ impl PTE {
     pub fn take(&mut self) -> Page {
         // SAFETY: Acquire the ownership of the page from the page table and then
         // clear the PTE so no one could be able to access the page from here later on.
-        let page = unsafe { Page::from_pfn(self.pfn(), 0) };
+        let page = unsafe { Page::take_pfn(self.pfn(), 0) };
         self.clear();
         page
     }
 }
 
-impl<const Kernel: bool> PTEIterator<'_, Kernel> {
-    fn new(pt: Page, start: VAddr, end: VAddr) -> KResult<Self> {
-        if start >= end {
+impl<'lt, const KERNEL: bool> PTEIterator<'lt, KERNEL> {
+    fn new(pt: &'lt Page, start: VAddr, end: VAddr) -> KResult<Self> {
+        if start > end {
             return Err(EINVAL);
         }
 
-        let p3 = pt.as_page_table()[Self::index(4, start)].parse_page_table(Kernel);
-        let p2 = pt.as_page_table()[Self::index(3, start)].parse_page_table(Kernel);
-        let p1 = pt.as_page_table()[Self::index(2, start)].parse_page_table(Kernel);
+        let p4 = pt.as_cached();
+        let p3 = p4.as_mut_slice::<PTE>(512)[Self::index(4, start)].parse_page_table(KERNEL);
+        let p2 = p3.as_mut_slice::<PTE>(512)[Self::index(3, start)].parse_page_table(KERNEL);
+        let p1 = p2.as_mut_slice::<PTE>(512)[Self::index(2, start)].parse_page_table(KERNEL);
 
         Ok(Self {
             count: (end.0 - start.0) >> 12,
@@ -137,7 +147,7 @@ impl<const Kernel: bool> PTEIterator<'_, Kernel> {
             i3: Self::index(3, start) as u16,
             i2: Self::index(2, start) as u16,
             i1: Self::index(1, start) as u16,
-            p4: pt.clone(),
+            p4,
             p3,
             p2,
             p1,
@@ -156,15 +166,17 @@ impl<const Kernel: bool> PTEIterator<'_, Kernel> {
     }
 }
 
-impl<'lt, const Kernel: bool> Iterator for PTEIterator<'lt, Kernel> {
+impl<'lt, const KERNEL: bool> Iterator for PTEIterator<'lt, KERNEL> {
     type Item = &'lt mut PTE;
 
     fn next(&mut self) -> Option<Self::Item> {
-        if self.count == 0 {
+        if self.count != 0 {
+            self.count -= 1;
+        } else {
             return None;
         }
 
-        let retval = &mut self.p1.as_page_table()[self.i1 as usize];
+        let retval = &mut self.p1.as_mut_slice::<PTE>(512)[self.i1 as usize];
         self.i1 = (self.i1 + 1) % 512;
         if self.i1 == 0 {
             self.i2 = (self.i2 + 1) % 512;
@@ -176,11 +188,12 @@ impl<'lt, const Kernel: bool> Iterator for PTEIterator<'lt, Kernel> {
                         panic!("PTEIterator: out of range");
                     }
                 }
-                self.p3 = self.p4.as_page_table()[self.i4 as usize].parse_page_table(Kernel);
+                self.p3 =
+                    self.p4.as_mut_slice::<PTE>(512)[self.i4 as usize].parse_page_table(KERNEL);
             }
-            self.p2 = self.p3.as_page_table()[self.i3 as usize].parse_page_table(Kernel);
+            self.p2 = self.p3.as_mut_slice::<PTE>(512)[self.i3 as usize].parse_page_table(KERNEL);
         }
-        self.p1 = self.p2.as_page_table()[self.i2 as usize].parse_page_table(Kernel);
+        self.p1 = self.p2.as_mut_slice::<PTE>(512)[self.i2 as usize].parse_page_table(KERNEL);
         Some(retval)
     }
 }
@@ -193,19 +206,19 @@ impl PageTable {
         let kernel_space_page_table = CachedPP::new(KERNEL_PML4 as usize);
         unsafe {
             page.as_cached()
-                .as_ptr::<()>()
+                .as_ptr::<u8>()
                 .copy_from_nonoverlapping(kernel_space_page_table.as_ptr(), page.len())
         };
 
         Self { page }
     }
 
-    pub fn iter_user(&self, range: VRange) -> PTEIterator<'_, false> {
-        PTEIterator::new(self.page.clone(), range.start().floor(), range.end().ceil()).unwrap()
+    pub fn iter_user(&self, range: VRange) -> KResult<PTEIterator<'_, false>> {
+        PTEIterator::new(&self.page, range.start().floor(), range.end().ceil())
     }
 
-    pub fn iter_kernel(&self, range: VRange) -> PTEIterator<'_, true> {
-        PTEIterator::new(self.page.clone(), range.start().floor(), range.end().ceil()).unwrap()
+    pub fn iter_kernel(&self, range: VRange) -> KResult<PTEIterator<'_, true>> {
+        PTEIterator::new(&self.page, range.start().floor(), range.end().ceil())
     }
 
     pub fn switch(&self) {
@@ -215,7 +228,7 @@ impl PageTable {
     pub fn unmap(&self, area: &MMArea) {
         let range = area.range();
         let use_invlpg = range.len() / 4096 < 4;
-        let iter = self.iter_user(range);
+        let iter = self.iter_user(range).unwrap();
 
         if self.page.as_phys() != arch::vm::current_page_table() {
             for pte in iter {
@@ -248,8 +261,8 @@ impl PageTable {
             PA_US | PA_COW | PA_ANON | PA_MMAP | PA_NXE
         };
 
-        for pte in self.iter_user(range) {
-            pte.set(EMPTY_PAGE_PFN, attributes);
+        for pte in self.iter_user(range).unwrap() {
+            pte.set(EMPTY_PAGE.clone().into_pfn(), attributes);
         }
     }
 
@@ -262,15 +275,16 @@ impl PageTable {
             PA_P | PA_US | PA_COW | PA_ANON | PA_NXE
         };
 
-        for pte in self.iter_user(range) {
-            pte.set(EMPTY_PAGE_PFN, attributes);
+        for pte in self.iter_user(range).unwrap() {
+            pte.set(EMPTY_PAGE.clone().into_pfn(), attributes);
         }
     }
 }
 
 fn drop_page_table_recursive(pt: &Page, level: usize) {
     for pte in pt
-        .as_page_table()
+        .as_cached()
+        .as_mut_slice::<PTE>(512)
         .iter_mut()
         .filter(|pte| pte.is_present() && pte.is_user())
     {

+ 0 - 146
src/kernel/mem/paging.cc

@@ -5,10 +5,8 @@
 
 #include <kernel/async/lock.hpp>
 #include <kernel/log.hpp>
-#include <kernel/mem/mm_list.hpp>
 #include <kernel/mem/paging.hpp>
 #include <kernel/mem/slab.hpp>
-#include <kernel/mem/vm_area.hpp>
 #include <kernel/process.hpp>
 
 using namespace types::list;
@@ -16,11 +14,6 @@ using namespace types::list;
 using namespace kernel::async;
 using namespace kernel::mem::paging;
 
-static inline void __page_fault_die(uintptr_t vaddr) {
-    kmsgf("[kernel] kernel panic: invalid memory access to %p", vaddr);
-    freeze();
-}
-
 static struct zone_info {
     page* next;
     std::size_t count;
@@ -261,142 +254,3 @@ page* kernel::mem::paging::pfn_to_page(pfn_t pfn) {
 void kernel::mem::paging::increase_refcount(page* pg) {
     pg->refcount++;
 }
-
-struct fix_entry {
-    uint64_t start;
-    uint64_t length;
-    uint64_t jump_address;
-    uint64_t type;
-};
-
-extern "C" fix_entry FIX_START[], FIX_END[];
-bool page_fault_fix(interrupt_stack* int_stack) {
-    // TODO: type load
-
-    // type store
-    for (fix_entry* fix = FIX_START; fix < FIX_END; fix++) {
-        if (int_stack->v_rip >= fix->start && int_stack->v_rip < fix->start + fix->length) {
-            int_stack->v_rip = fix->jump_address;
-            return true;
-        }
-    }
-
-    return false;
-}
-
-void kernel::mem::paging::handle_page_fault(interrupt_stack* int_stack) {
-    using namespace kernel::mem;
-    using namespace paging;
-
-    auto err = int_stack->error_code;
-
-    uintptr_t vaddr;
-    asm volatile("mov %%cr2, %0" : "=g"(vaddr) : :);
-    auto& mms = current_process->mms;
-
-    auto* mm_area = mms.find(vaddr);
-    if (!mm_area) [[unlikely]] {
-        // user access to address that does not exist
-        if (err & PAGE_FAULT_U)
-            kill_current(SIGSEGV);
-
-        if (!page_fault_fix(int_stack)) {
-            __page_fault_die(vaddr);
-        } else {
-            return;
-        }
-    }
-
-    // user access to a present page caused the fault
-    // check access rights
-    if (err & PAGE_FAULT_U && err & PAGE_FAULT_P) {
-        // write to read only pages
-        if (err & PAGE_FAULT_W && !(mm_area->flags & MM_WRITE))
-            kill_current(SIGSEGV);
-
-        // execute from non-executable pages
-        if (err & PAGE_FAULT_I && !(mm_area->flags & MM_EXECUTE))
-            kill_current(SIGSEGV);
-    }
-
-    auto idx = idx_all(vaddr);
-
-    auto pe = mms.get_page_table()[std::get<1>(idx)];
-    assert(pe.attributes() & PA_P);
-    pe = pe.parse()[std::get<2>(idx)];
-    assert(pe.attributes() & PA_P);
-    pe = pe.parse()[std::get<3>(idx)];
-    assert(pe.attributes() & PA_P);
-    pe = pe.parse()[std::get<4>(idx)];
-
-    bool mmapped = mm_area->flags & MM_MAPPED;
-    assert(!mmapped || mm_area->mapped_file);
-
-    if (!(err & PAGE_FAULT_P) && !mmapped) [[unlikely]] {
-        if (!page_fault_fix(int_stack)) {
-            __page_fault_die(vaddr);
-        } else {
-            return;
-        }
-    }
-
-    pfn_t pfn = pe.pfn();
-    auto attr = pe.attributes();
-
-    page* pg = pfn_to_page(pfn);
-
-    if (attr & PA_COW) {
-        attr &= ~PA_COW;
-        if (mm_area->flags & MM_WRITE)
-            attr |= PA_RW;
-        else
-            attr &= ~PA_RW;
-
-        // if it is a dying page
-        // TODO: use atomic
-        if (pg->refcount == 1) {
-            pe.set(attr, pfn);
-            return;
-        }
-
-        // duplicate the page
-        page* new_page = alloc_page();
-        pfn_t new_pfn = page_to_pfn(new_page);
-        physaddr<void> new_page_addr{new_pfn};
-
-        if (attr & PA_ANON)
-            memset(new_page_addr, 0x00, 0x1000);
-        else
-            memcpy(new_page_addr, physaddr<void>{pfn}, 0x1000);
-
-        attr &= ~(PA_A | PA_ANON);
-        --pg->refcount;
-
-        pe.set(attr, new_pfn);
-        pfn = new_pfn;
-    }
-
-    if (attr & PA_MMAP) {
-        attr |= PA_P;
-
-        size_t offset = (vaddr & ~0xfff) - mm_area->start;
-        char* data = physaddr<char>{pfn};
-
-        int n = fs::fs_read(mm_area->mapped_file.get(), data, 4096, mm_area->file_offset + offset,
-                            4096);
-
-        if (n < 0) {
-            kill_current(SIGBUS);
-            return;
-        }
-
-        // TODO: send SIGBUS if offset is greater than real size
-        if (n != 4096)
-            memset(data + n, 0x00, 4096 - n);
-
-        // TODO: shared mapping
-        attr &= ~PA_MMAP;
-
-        pe.set(attr, pfn);
-    }
-}

+ 36 - 12
src/kernel/mem/paging.rs

@@ -4,11 +4,11 @@ use crate::bindings::root::kernel::mem::paging::{
     pfn_to_page as c_pfn_to_page, PAGE_BUDDY,
 };
 use crate::bindings::root::EFAULT;
+use crate::io::{Buffer, FillResult};
 use crate::kernel::mem::phys;
 use core::fmt;
 
 use super::phys::PhysPtr;
-use super::PTE;
 
 pub struct Page {
     page_ptr: *mut c_page,
@@ -28,12 +28,11 @@ impl Page {
         Self { page_ptr, order }
     }
 
-    /// Get `Page` from `pfn` without increasing the reference count.
+    /// Get `Page` from `pfn`, acquiring the ownership of the page. `refcount` is not increased.
     ///
     /// # Safety
-    ///
-    /// Caller must ensure that the `pfn` is no longer used or there will be a memory leak.
-    pub unsafe fn from_pfn(pfn: usize, order: u32) -> Self {
+    /// Caller must ensure that the pfn is no longer referenced by any other code.
+    pub unsafe fn take_pfn(pfn: usize, order: u32) -> Self {
         let page_ptr = unsafe { c_pfn_to_page(pfn) };
 
         // Only buddy pages can be used here.
@@ -49,12 +48,15 @@ impl Page {
     }
 
     /// Get `Page` from `pfn` and increase the reference count.
-    pub fn get(pfn: usize, order: u32) -> Self {
+    ///
+    /// # Safety
+    /// Caller must ensure that `pfn` refers to a valid physical frame number with `refcount` > 0.
+    pub unsafe fn from_pfn(pfn: usize, order: u32) -> Self {
         // SAFETY: `pfn` is a valid physical frame number with refcount > 0.
         unsafe { Self::increase_refcount(pfn) };
 
-        // SAFETY: `pfn` has increased refcount.
-        unsafe { Self::from_pfn(pfn, order) }
+        // SAFETY: `pfn` has an increased refcount.
+        unsafe { Self::take_pfn(pfn, order) }
     }
 
     /// Consumes the `Page` and returns the physical frame number without dropping the reference
@@ -89,10 +91,6 @@ impl Page {
         }
     }
 
-    pub fn as_page_table<'lt>(&self) -> &'lt mut [PTE; 512] {
-        self.as_cached().as_mut_slice(512).try_into().unwrap()
-    }
-
     /// # Safety
     /// Caller must ensure that the page is properly freed.
     pub unsafe fn increase_refcount(pfn: usize) {
@@ -198,6 +196,32 @@ impl core::fmt::Write for PageBuffer {
     }
 }
 
+impl Buffer for PageBuffer {
+    fn total(&self) -> usize {
+        self.page.len()
+    }
+
+    fn wrote(&self) -> usize {
+        self.len()
+    }
+
+    fn fill(&mut self, data: &[u8]) -> crate::KResult<crate::io::FillResult> {
+        if self.remaining() == 0 {
+            return Ok(FillResult::Full);
+        }
+
+        let len = core::cmp::min(data.len(), self.remaining());
+        self.available_as_slice()[..len].copy_from_slice(&data[..len]);
+        self.consume(len);
+
+        if len < data.len() {
+            Ok(FillResult::Partial(len))
+        } else {
+            Ok(FillResult::Done(len))
+        }
+    }
+}
+
 /// Copy data from a slice to a `Page`
 ///
 /// DONT USE THIS FUNCTION TO COPY DATA TO MMIO ADDRESSES

+ 1 - 90
src/kernel/process.cpp

@@ -1,95 +1,6 @@
-#include <assert.h>
-#include <bits/alltypes.h>
-#include <stdint.h>
-#include <sys/mount.h>
-#include <sys/wait.h>
-
-#include <types/allocator.hpp>
-#include <types/cplusplus.hpp>
-#include <types/elf.hpp>
 #include <types/types.h>
 
-#include <kernel/async/lock.hpp>
-#include <kernel/log.hpp>
-#include <kernel/mem/paging.hpp>
-#include <kernel/process.hpp>
-#include <kernel/vfs.hpp>
-#include <kernel/vfs/dentry.hpp>
-
-extern "C" void late_init_rust(uintptr_t* out_sp, uintptr_t* out_ip);
-
-void NORETURN _kernel_init(kernel::mem::paging::pfn_t kernel_stack_pfn) {
-    kernel::mem::paging::free_pages(kernel_stack_pfn, 9);
-
-    uintptr_t sp, ip;
-    late_init_rust(&sp, &ip);
-
-    asm volatile("sti");
-
-    // ------------------------------------------
-    // interrupt enabled
-    // ------------------------------------------
-
-    int ds = 0x33, cs = 0x2b;
-
-    asm volatile(
-        "mov %0, %%rax\n"
-        "mov %%ax, %%ds\n"
-        "mov %%ax, %%es\n"
-        "mov %%ax, %%fs\n"
-        "mov %%ax, %%gs\n"
-
-        "push %%rax\n"
-        "push %2\n"
-        "push $0x200\n"
-        "push %1\n"
-        "push %3\n"
-
-        "iretq\n"
-        :
-        : "g"(ds), "g"(cs), "g"(sp), "g"(ip)
-        : "eax", "memory");
-
-    freeze();
-}
-
-void NORETURN init_scheduler(kernel::mem::paging::pfn_t kernel_stack_pfn) {
-    procs = new proclist;
-
-    asm volatile(
-        "mov %2, %%rdi\n"
-        "mov %0, %%rsp\n"
-        "sub $24, %%rsp\n"
-        "mov %=f, %%rbx\n"
-        "mov %%rbx, (%%rsp)\n"   // return address
-        "mov %%rbx, 16(%%rsp)\n" // previous frame return address
-        "xor %%rbx, %%rbx\n"
-        "mov %%rbx, 8(%%rsp)\n" // previous frame rbp
-        "mov %%rsp, %%rbp\n"    // current frame rbp
-
-        "push %1\n"
-
-        "mov $0x10, %%ax\n"
-        "mov %%ax, %%ss\n"
-        "mov %%ax, %%ds\n"
-        "mov %%ax, %%es\n"
-        "mov %%ax, %%fs\n"
-        "mov %%ax, %%gs\n"
-
-        "push $0x0\n"
-        "popf\n"
-
-        "ret\n"
-
-        "%=:\n"
-        "ud2"
-        :
-        : "a"(current_thread->kstack.sp), "c"(_kernel_init), "g"(kernel_stack_pfn)
-        : "memory");
-
-    freeze();
-}
-
+// TODO: remove this
 void NORETURN freeze(void) {
     for (;;)
         asm volatile("cli\n\thlt");

+ 2 - 11
src/kernel/syscall.rs

@@ -179,22 +179,13 @@ const SYSCALL_HANDLERS_SIZE: usize = 404;
 static mut SYSCALL_HANDLERS: [Option<SyscallHandler>; SYSCALL_HANDLERS_SIZE] =
     [const { None }; SYSCALL_HANDLERS_SIZE];
 
-#[no_mangle]
-pub extern "C" fn handle_syscall32(
-    no: usize,
-    int_stack: *mut interrupt_stack,
-    mmxregs: *mut mmx_registers,
-) {
+pub fn handle_syscall32(no: usize, int_stack: &mut interrupt_stack, mmxregs: &mut mmx_registers) {
     // SAFETY: `SYSCALL_HANDLERS` are never modified after initialization.
     let syscall = unsafe { SYSCALL_HANDLERS.get(no) }.and_then(Option::as_ref);
 
-    // SAFETY: `int_stack` and `mmx_registers` are always valid.
-    let int_stack = unsafe { int_stack.as_mut().unwrap() };
-    let mmxregs = unsafe { mmxregs.as_mut().unwrap() };
-
     match syscall {
         None => {
-            println_warn!("Syscall {} isn't implemented", no);
+            println_warn!("Syscall {no}({no:#x}) isn't implemented");
             ProcessList::kill_current(Signal::SIGSYS);
         }
         Some(handler) => {

+ 15 - 4
src/kernel/syscall/mm.rs

@@ -52,8 +52,8 @@ fn do_mmap_pgoff(
 
     // TODO!!!: If we are doing mmap's in 32-bit mode, we should check whether
     //          `addr` is above user reachable memory.
-    mm_list
-        .mmap(
+    let addr = if flags.contains(UserMmapFlags::MAP_FIXED) {
+        mm_list.mmap_fixed(
             addr,
             len,
             Mapping::Anonymous,
@@ -61,9 +61,20 @@ fn do_mmap_pgoff(
                 write: prot.contains(UserMmapProtocol::PROT_WRITE),
                 execute: prot.contains(UserMmapProtocol::PROT_EXEC),
             },
-            flags.contains(UserMmapFlags::MAP_FIXED),
         )
-        .map(|addr| addr.0)
+    } else {
+        mm_list.mmap_hint(
+            addr,
+            len,
+            Mapping::Anonymous,
+            Permission {
+                write: prot.contains(UserMmapProtocol::PROT_WRITE),
+                execute: prot.contains(UserMmapProtocol::PROT_EXEC),
+            },
+        )
+    };
+
+    addr.map(|addr| addr.0)
 }
 
 fn do_munmap(addr: usize, len: usize) -> KResult<usize> {

+ 23 - 30
src/kernel/syscall/procops.rs

@@ -1,14 +1,14 @@
 use alloc::borrow::ToOwned;
 use alloc::ffi::CString;
-use alloc::sync::Arc;
-use bindings::types::elf::{elf32_load, elf32_load_data, ELF_LOAD_FAIL_NORETURN};
 use bindings::{interrupt_stack, mmx_registers, EINVAL, ENOENT, ENOTDIR, ESRCH};
 use bitflags::bitflags;
 
+use crate::elf::ParsedElf32;
 use crate::io::Buffer;
 use crate::kernel::constants::{PR_GET_NAME, PR_SET_NAME, SIG_BLOCK, SIG_SETMASK, SIG_UNBLOCK};
+use crate::kernel::mem::VAddr;
 use crate::kernel::task::{
-    Process, ProcessList, Scheduler, Signal, SignalAction, Thread, UserDescriptor, WaitObject,
+    ProcessList, Scheduler, Signal, SignalAction, Thread, UserDescriptor, WaitObject,
 };
 use crate::kernel::user::dataflow::UserString;
 use crate::kernel::user::{UserPointer, UserPointerMut};
@@ -77,35 +77,28 @@ fn do_mount(source: *const u8, target: *const u8, fstype: *const u8, flags: usiz
 }
 
 /// # Return
-/// `(ip, sp)`
-fn do_execve(exec: &[u8], argv: &[CString], envp: &[CString]) -> KResult<(usize, usize)> {
-    let context = FsContext::get_current();
-    let dentry = Dentry::open(&context, Path::new(exec)?, true)?;
+/// `(entry_ip, sp)`
+fn do_execve(exec: &[u8], argv: Vec<CString>, envp: Vec<CString>) -> KResult<(VAddr, VAddr)> {
+    let dentry = Dentry::open(&FsContext::get_current(), Path::new(exec)?, true)?;
     if !dentry.is_valid() {
         return Err(ENOENT);
     }
 
-    let argv_array = argv.iter().map(|x| x.as_ptr()).collect::<Vec<_>>();
-    let envp_array = envp.iter().map(|x| x.as_ptr()).collect::<Vec<_>>();
-
-    let mut load_data = elf32_load_data {
-        exec_dent: Arc::into_raw(dentry) as *mut _,
-        argv: argv_array.as_ptr(),
-        argv_count: argv_array.len(),
-        envp: envp_array.as_ptr(),
-        envp_count: envp_array.len(),
-        ip: 0,
-        sp: 0,
-    };
+    // TODO: When `execve` is called by one of the threads in a process, the other threads
+    //       should be terminated and `execve` is performed in the thread group leader.
+    let elf = ParsedElf32::parse(dentry.clone())?;
+    let result = elf.load(&Thread::current().process.mm_list, argv, envp);
+    if let Ok((ip, sp)) = result {
+        Thread::current().files.on_exec();
+        Thread::current().signal_list.clear_non_ignore();
+        Thread::current().set_name(dentry.name().clone());
 
-    Thread::current().files.on_exec();
-    match unsafe { elf32_load(&mut load_data) } {
-        ELF_LOAD_FAIL_NORETURN => ProcessList::kill_current(Signal::SIGSEGV),
-        0 => {
-            Thread::current().signal_list.clear_non_ignore();
-            Ok((load_data.ip, load_data.sp))
-        }
-        n => Err(-n as u32),
+        Ok((ip, sp))
+    } else {
+        drop(dentry);
+
+        // We can't hold any ownership when we call `kill_current`.
+        ProcessList::kill_current(Signal::SIGSEGV);
     }
 }
 
@@ -143,10 +136,10 @@ fn sys_execve(int_stack: &mut interrupt_stack, _mmxregs: &mut mmx_registers) ->
             envp = envp.offset(1)?;
         }
 
-        let (ip, sp) = do_execve(exec.as_cstr().to_bytes(), &argv_vec, &envp_vec)?;
+        let (ip, sp) = do_execve(exec.as_cstr().to_bytes(), argv_vec, envp_vec)?;
 
-        int_stack.v_rip = ip;
-        int_stack.rsp = sp;
+        int_stack.v_rip = ip.0;
+        int_stack.rsp = sp.0;
         Ok(())
     })() {
         Ok(_) => 0,

+ 2 - 2
src/kernel/task.rs

@@ -8,6 +8,6 @@ pub(self) use kstack::KernelStack;
 pub use scheduler::Scheduler;
 pub use signal::{Signal, SignalAction};
 pub use thread::{
-    Process, ProcessGroup, ProcessList, Session, Thread, ThreadState, UserDescriptor,
-    UserDescriptorFlags, WaitObject,
+    init_multitasking, Process, ProcessGroup, ProcessList, Session, Thread, ThreadState,
+    UserDescriptor, UserDescriptorFlags, WaitObject,
 };

+ 1 - 1
src/kernel/task/kstack.rs

@@ -68,7 +68,7 @@ impl<'lt> KernelStackWriter<'lt> {
 
         // SAFETY: `sp` is always valid.
         unsafe {
-            (self.sp as *mut usize).write(val);
+            (*self.sp as *mut usize).write(val);
         }
     }
 

+ 15 - 1
src/kernel/task/scheduler.rs

@@ -6,6 +6,7 @@ use alloc::{
     collections::vec_deque::VecDeque,
     sync::{Arc, Weak},
 };
+use lazy_static::lazy_static;
 
 use super::{Thread, ThreadState};
 
@@ -29,6 +30,12 @@ static mut IDLE_TASK: Option<Arc<Thread>> = None;
 /// TODO!!!: This should be per cpu in smp environment.
 static mut CURRENT: Option<Arc<Thread>> = None;
 
+lazy_static! {
+    static ref GLOBAL_SCHEDULER: Spin<Scheduler> = Spin::new(Scheduler {
+        ready: VecDeque::new(),
+    });
+}
+
 impl Scheduler {
     /// `Scheduler` might be used in various places. Do not hold it for a long time.
     ///
@@ -38,7 +45,7 @@ impl Scheduler {
     ///
     /// Drop the lock before calling `schedule`.
     pub fn get() -> &'static Spin<Self> {
-        todo!()
+        &GLOBAL_SCHEDULER
     }
 
     pub fn current<'lt>() -> &'lt Arc<Thread> {
@@ -52,6 +59,13 @@ impl Scheduler {
     }
 
     pub(super) fn set_idle(thread: Arc<Thread>) {
+        thread.prepare_kernel_stack(|kstack| {
+            let mut writer = kstack.get_writer();
+            writer.flags = 0x200;
+            writer.entry = idle_task;
+            writer.finish();
+        });
+
         // TODO!!!: Set per cpu variable.
         unsafe { IDLE_TASK = Some(thread) };
     }

+ 7 - 8
src/kernel/task/signal.rs

@@ -4,12 +4,12 @@ use crate::{io::BufferFill, kernel::user::dataflow::UserBuffer, prelude::*};
 
 use alloc::collections::{binary_heap::BinaryHeap, btree_map::BTreeMap};
 use bindings::{
-    interrupt_stack, kill_current, mmx_registers, EFAULT, EINVAL, SA_RESTORER, SIGABRT, SIGBUS,
-    SIGCHLD, SIGCONT, SIGFPE, SIGILL, SIGKILL, SIGQUIT, SIGSEGV, SIGSTOP, SIGSYS, SIGTRAP, SIGTSTP,
-    SIGTTIN, SIGTTOU, SIGURG, SIGWINCH, SIGXCPU, SIGXFSZ,
+    interrupt_stack, mmx_registers, EFAULT, EINVAL, SA_RESTORER, SIGABRT, SIGBUS, SIGCHLD, SIGCONT,
+    SIGFPE, SIGILL, SIGKILL, SIGQUIT, SIGSEGV, SIGSTOP, SIGSYS, SIGTRAP, SIGTSTP, SIGTTIN, SIGTTOU,
+    SIGURG, SIGWINCH, SIGXCPU, SIGXFSZ,
 };
 
-use super::Thread;
+use super::{ProcessList, Thread};
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
 pub struct Signal(u32);
@@ -385,11 +385,10 @@ impl SignalList {
 
 // TODO!!!: Should we use `uwake` or `iwake`?
 fn terminate_process(signal: Signal) -> ! {
-    unsafe { kill_current(signal.to_signum() as i32) };
+    ProcessList::kill_current(signal)
 }
 
+// TODO!!!!!!: Check exit status format.
 fn terminate_process_core_dump(signal: Signal) -> ! {
-    unsafe { kill_current(signal.to_signum() as i32 & 0x80) };
+    ProcessList::kill_current(signal)
 }
-
-fn schedule() {}

+ 46 - 55
src/kernel/task/thread.rs

@@ -24,6 +24,7 @@ use alloc::{
     sync::{Arc, Weak},
 };
 use bindings::{ECHILD, EINTR, EINVAL, EPERM, ESRCH};
+use lazy_static::lazy_static;
 
 use crate::kernel::vfs::filearray::FileArray;
 
@@ -221,7 +222,7 @@ pub struct ProcessList {
 
 impl Session {
     fn new(sid: u32, leader: Weak<Process>) -> Arc<Self> {
-        let session = Arc::new(Self {
+        Arc::new(Self {
             sid,
             leader,
             inner: Spin::new(SessionInner {
@@ -229,10 +230,7 @@ impl Session {
                 control_terminal: None,
                 groups: BTreeMap::new(),
             }),
-        });
-
-        ProcessList::get().add_session(&session);
-        session
+        })
     }
 
     fn add_member(&self, pgroup: &Arc<ProcessGroup>) {
@@ -270,15 +268,12 @@ impl Session {
 
 impl ProcessGroup {
     fn new_for_init(pgid: u32, leader: Weak<Process>, session: Weak<Session>) -> Arc<Self> {
-        let pgroup = Arc::new(Self {
+        Arc::new(Self {
             pgid,
             leader: leader.clone(),
             session,
             processes: Spin::new(BTreeMap::from([(pgid, leader)])),
-        });
-
-        ProcessList::get().add_pgroup(&pgroup);
-        pgroup
+        })
     }
 
     fn new(leader: &Arc<Process>, session: &Arc<Session>) -> Arc<Self> {
@@ -289,7 +284,6 @@ impl ProcessGroup {
             processes: Spin::new(BTreeMap::from([(leader.pid, Arc::downgrade(leader))])),
         });
 
-        ProcessList::get().add_pgroup(&pgroup);
         session.add_member(&pgroup);
         pgroup
     }
@@ -324,9 +318,38 @@ impl Drop for ProcessGroup {
     }
 }
 
+lazy_static! {
+    static ref GLOBAL_PROC_LIST: ProcessList = {
+        let init_process = Process::new_for_init(1, None);
+        let init_thread = Thread::new_for_init(b"[kernel kinit]".as_slice().into(), &init_process);
+        Scheduler::set_current(init_thread.clone());
+
+        let idle_process = Process::new_for_init(0, None);
+        let idle_thread =
+            Thread::new_for_init(b"[kernel idle#BS]".as_slice().into(), &idle_process);
+        Scheduler::set_idle(idle_thread.clone());
+
+        let init_session_weak = Arc::downgrade(&init_process.inner.lock().session);
+        let init_pgroup_weak = Arc::downgrade(&init_process.inner.lock().pgroup);
+
+        ProcessList {
+            sessions: Spin::new(BTreeMap::from([(1, init_session_weak)])),
+            pgroups: Spin::new(BTreeMap::from([(1, init_pgroup_weak)])),
+            threads: Spin::new(BTreeMap::from([
+                (1, init_thread.clone()),
+                (0, idle_thread.clone()),
+            ])),
+            processes: Spin::new(BTreeMap::from([
+                (1, Arc::downgrade(&init_process)),
+                (0, Arc::downgrade(&idle_process)),
+            ])),
+            init: init_process,
+        }
+    };
+}
 impl ProcessList {
     pub fn get() -> &'static Self {
-        todo!()
+        &GLOBAL_PROC_LIST
     }
 
     pub fn add_session(&self, session: &Arc<Session>) {
@@ -360,31 +383,6 @@ impl ProcessList {
         Scheduler::schedule_noreturn()
     }
 
-    fn new() -> Self {
-        let init_process = Process::new_for_init(1, None);
-        let init_thread = Thread::new_for_init(b"[kernel kinit]".as_slice().into(), &init_process);
-        Scheduler::set_current(init_thread.clone());
-
-        let idle_process = Process::new_for_init(0, None);
-        let idle_thread =
-            Thread::new_for_init(b"[kernel idle#BS]".as_slice().into(), &idle_process);
-        Scheduler::set_idle(idle_thread.clone());
-
-        Self {
-            sessions: Spin::new(BTreeMap::new()),
-            pgroups: Spin::new(BTreeMap::new()),
-            threads: Spin::new(BTreeMap::from([
-                (1, init_thread.clone()),
-                (0, idle_thread.clone()),
-            ])),
-            processes: Spin::new(BTreeMap::from([
-                (1, Arc::downgrade(&init_process)),
-                (0, Arc::downgrade(&idle_process)),
-            ])),
-            init: init_process,
-        }
-    }
-
     // TODO!!!!!!: Reconsider this
     fn remove(&self, tid: u32) {
         if let None = self.threads.lock().remove(&tid) {
@@ -557,7 +555,6 @@ impl Process {
             }
         });
 
-        ProcessList::get().add_process(&process);
         process.inner.lock().pgroup.add_member(&process);
         process
     }
@@ -635,8 +632,12 @@ impl Process {
             return Err(EPERM);
         }
         inner.session = Session::new(self.pid, Arc::downgrade(self));
+        ProcessList::get().add_session(&inner.session);
+
         inner.pgroup.remove_member(self.pid);
         inner.pgroup = ProcessGroup::new(self, &inner.session);
+        ProcessList::get().add_pgroup(&inner.pgroup);
+
         Ok(inner.pgroup.pgid)
     }
 
@@ -672,8 +673,11 @@ impl Process {
             }
 
             inner.session = Session::new(self.pid, Arc::downgrade(self));
+            ProcessList::get().add_session(&inner.session);
+
             inner.pgroup.remove_member(self.pid);
             inner.pgroup = ProcessGroup::new(self, &inner.session);
+            ProcessList::get().add_pgroup(&inner.pgroup);
         }
 
         Ok(())
@@ -772,7 +776,6 @@ impl Thread {
             }),
         });
 
-        ProcessList::get().add_thread(&thread);
         process.add_thread(&thread);
         thread
     }
@@ -961,22 +964,10 @@ impl Process {
     }
 }
 
-// TODO!!!!!!: impl this
-fn init_scheduler() {
-    let process_list = ProcessList::new();
+pub fn init_multitasking() {
+    // Lazy init
+    assert!(ProcessList::get().try_find_thread(1).is_some());
+
     Thread::current().load_interrupt_stack();
     Thread::current().process.mm_list.switch_page_table();
-
-    Scheduler::idle_task().prepare_kernel_stack(|kstack| {
-        let mut writer = kstack.get_writer();
-        writer.flags = 0x200;
-        writer.entry = idle_task;
-        writer.finish();
-    });
-}
-
-extern "C" fn idle_task() {
-    loop {
-        arch::task::halt();
-    }
 }

+ 1 - 65
src/kernel/terminal.rs

@@ -104,62 +104,10 @@ bitflags! {
 }
 
 /* c_cflag bit meaning */
-/* Common CBAUD rates */
-const B0: u32 = 0x00000000; /* hang up */
-const B50: u32 = 0x00000001;
-const B75: u32 = 0x00000002;
-const B110: u32 = 0x00000003;
-const B134: u32 = 0x00000004;
-const B150: u32 = 0x00000005;
-const B200: u32 = 0x00000006;
-const B300: u32 = 0x00000007;
-const B600: u32 = 0x00000008;
-const B1200: u32 = 0x00000009;
-const B1800: u32 = 0x0000000a;
-const B2400: u32 = 0x0000000b;
-const B4800: u32 = 0x0000000c;
-const B9600: u32 = 0x0000000d;
-const B19200: u32 = 0x0000000e;
 const B38400: u32 = 0x0000000f;
-const EXTA: u32 = B19200;
-const EXTB: u32 = B38400;
-
-const ADDRB: u32 = 0x20000000; /* address bit */
-const CMSPAR: u32 = 0x40000000; /* mark or space (stick) parity */
-const CRTSCTS: u32 = 0x80000000; /* flow control */
-
-const IBSHIFT: u32 = 16; /* Shift from CBAUD to CIBAUD */
-
-const CBAUD: u32 = 0x0000100f;
-const CSIZE: u32 = 0x00000030;
-const CS5: u32 = 0x00000000;
-const CS6: u32 = 0x00000010;
-const CS7: u32 = 0x00000020;
 const CS8: u32 = 0x00000030;
-const CSTOPB: u32 = 0x00000040;
 const CREAD: u32 = 0x00000080;
-const PARENB: u32 = 0x00000100;
-const PARODD: u32 = 0x00000200;
 const HUPCL: u32 = 0x00000400;
-const CLOCAL: u32 = 0x00000800;
-const CBAUDEX: u32 = 0x00001000;
-const BOTHER: u32 = 0x00001000;
-const B57600: u32 = 0x00001001;
-const B115200: u32 = 0x00001002;
-const B230400: u32 = 0x00001003;
-const B460800: u32 = 0x00001004;
-const B500000: u32 = 0x00001005;
-const B576000: u32 = 0x00001006;
-const B921600: u32 = 0x00001007;
-const B1000000: u32 = 0x00001008;
-const B1152000: u32 = 0x00001009;
-const B1500000: u32 = 0x0000100a;
-const B2000000: u32 = 0x0000100b;
-const B2500000: u32 = 0x0000100c;
-const B3000000: u32 = 0x0000100d;
-const B3500000: u32 = 0x0000100e;
-const B4000000: u32 = 0x0000100f;
-const CIBAUD: u32 = 0x100f0000; /* input baud rate */
 
 // line disciplines
 
@@ -260,10 +208,6 @@ macro_rules! CTRL {
 }
 
 impl Termios {
-    pub fn ctrl(c: u8) -> u8 {
-        c - 0x40
-    }
-
     pub fn veof(&self) -> u8 {
         self.cc[VEOF]
     }
@@ -288,14 +232,6 @@ impl Termios {
         self.cc[VSUSP]
     }
 
-    pub fn vstart(&self) -> u8 {
-        self.cc[VSTART]
-    }
-
-    pub fn vstop(&self) -> u8 {
-        self.cc[VSTOP]
-    }
-
     pub fn verase(&self) -> u8 {
         self.cc[VERASE]
     }
@@ -616,7 +552,7 @@ impl Terminal {
 
             if !inner.termio.icanon() {
                 let ch = inner.buffer.pop_front().unwrap();
-                buffer.fill(&[ch])?;
+                buffer.fill(&[ch])?.allow_partial();
                 break 'block;
             }
 

+ 5 - 2
src/kernel/timer.rs

@@ -1,8 +1,8 @@
 use core::sync::atomic::{AtomicUsize, Ordering};
 
-use crate::prelude::*;
+use crate::{prelude::*, sync::preempt};
 
-use super::interrupt::register_irq_handler;
+use super::{interrupt::register_irq_handler, task::Scheduler};
 
 static TICKS: AtomicUsize = AtomicUsize::new(0);
 
@@ -28,6 +28,9 @@ impl Ticks {
 
 fn timer_interrupt() {
     TICKS.fetch_add(1, Ordering::Relaxed);
+    if preempt::count() == 0 {
+        Scheduler::schedule();
+    }
 }
 
 pub fn ticks() -> Ticks {

+ 0 - 76
src/kernel/vfs.cpp

@@ -1,76 +0,0 @@
-#include <cstddef>
-
-#include <assert.h>
-#include <bits/alltypes.h>
-#include <errno.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <sys/mount.h>
-#include <sys/types.h>
-
-#include <types/allocator.hpp>
-#include <types/path.hpp>
-
-#include <kernel/log.hpp>
-#include <kernel/process.hpp>
-#include <kernel/vfs.hpp>
-#include <kernel/vfs/dentry.hpp>
-
-static fs::chrdev_ops** chrdevs[256];
-
-int fs::register_char_device(dev_t node, const fs::chrdev_ops& ops) {
-    int major = NODE_MAJOR(node);
-    int minor = NODE_MINOR(node);
-
-    if (!chrdevs[major])
-        chrdevs[major] = new chrdev_ops* [256] {};
-
-    if (chrdevs[major][minor])
-        return -EEXIST;
-
-    chrdevs[major][minor] = new chrdev_ops{ops};
-    return 0;
-}
-
-ssize_t fs::char_device_read(dev_t node, char* buf, size_t buf_size, size_t n) {
-    int major = NODE_MAJOR(node);
-    int minor = NODE_MINOR(node);
-
-    if (!chrdevs[major] || !chrdevs[major][minor])
-        return -EINVAL;
-
-    auto& read = chrdevs[major][minor]->read;
-    if (!read)
-        return -EINVAL;
-
-    return read(buf, buf_size, n);
-}
-
-ssize_t fs::char_device_write(dev_t node, const char* buf, size_t n) {
-    int major = NODE_MAJOR(node);
-    int minor = NODE_MINOR(node);
-
-    if (!chrdevs[major] || !chrdevs[major][minor])
-        return -EINVAL;
-
-    auto& write = chrdevs[major][minor]->write;
-    if (!write)
-        return -EINVAL;
-
-    return write(buf, n);
-}
-
-extern "C" void r_dput(struct dentry* dentry);
-extern "C" struct dentry* r_dget(struct dentry* dentry);
-
-void fs::dentry_deleter::operator()(struct dentry* dentry) const {
-    if (dentry)
-        r_dput(dentry);
-}
-
-fs::dentry_pointer fs::d_get(const dentry_pointer& dp) {
-    if (!dp)
-        return nullptr;
-
-    return dentry_pointer{r_dget(dp.get())};
-}

+ 7 - 80
src/kernel/vfs/dentry.rs

@@ -9,14 +9,16 @@ use core::{
 use crate::{
     hash::KernelHasher,
     io::{Buffer, ByteBuffer},
-    kernel::block::BlockDevice,
+    kernel::{block::BlockDevice, CharDevice},
     path::{Path, PathComponent},
     prelude::*,
     rcu::{RCUNode, RCUPointer},
 };
 
 use alloc::sync::Arc;
-use bindings::{statx, EEXIST, EINVAL, EISDIR, ELOOP, ENOENT, ENOTDIR, ERANGE, O_CREAT, O_EXCL};
+use bindings::{
+    statx, EEXIST, EINVAL, EISDIR, ELOOP, ENOENT, ENOTDIR, EPERM, ERANGE, O_CREAT, O_EXCL,
+};
 
 use super::{
     inode::{Ino, Inode, Mode, WriteOffset},
@@ -178,13 +180,6 @@ impl Dentry {
             .map(|data| data.inode.clone())
     }
 
-    /// This function is used to get the **borrowed** dentry from a raw pointer
-    pub fn from_raw(raw: &*const Self) -> BorrowedArc<Self> {
-        assert!(!raw.is_null());
-
-        BorrowedArc::new(raw)
-    }
-
     pub fn is_directory(&self) -> bool {
         let data = self.data.load();
         data.as_ref()
@@ -359,27 +354,8 @@ impl Dentry {
                 Ok(device.read_some(offset, buffer)?.allow_partial())
             }
             mode if s_ischr(mode) => {
-                let devid = inode.devid()?;
-
-                // TODO!!!!!: change this
-                let mut temporary_buffer = [0u8; 256];
-
-                let ret = unsafe {
-                    bindings::fs::char_device_read(
-                        devid,
-                        temporary_buffer.as_mut_ptr() as *mut _,
-                        temporary_buffer.len(),
-                        temporary_buffer.len(),
-                    )
-                };
-
-                if ret < 0 {
-                    Err(-ret as u32)
-                } else {
-                    Ok(buffer
-                        .fill(&temporary_buffer[..ret as usize])?
-                        .allow_partial())
-                }
+                let device = CharDevice::get(inode.devid()?).ok_or(EPERM)?;
+                device.read(buffer)
             }
             _ => Err(EINVAL),
         }
@@ -392,23 +368,7 @@ impl Dentry {
             mode if s_isdir(mode) => Err(EISDIR),
             mode if s_isreg(mode) => inode.write(buffer, offset),
             mode if s_isblk(mode) => Err(EINVAL), // TODO
-            mode if s_ischr(mode) => {
-                let devid = inode.devid()?;
-
-                let ret = unsafe {
-                    bindings::fs::char_device_write(
-                        devid,
-                        buffer.as_ptr() as *const _,
-                        buffer.len(),
-                    )
-                };
-
-                if ret < 0 {
-                    Err(-ret as u32)
-                } else {
-                    Ok(ret as usize)
-                }
-            }
+            mode if s_ischr(mode) => CharDevice::get(inode.devid()?).ok_or(EPERM)?.write(buffer),
             _ => Err(EINVAL),
         }
     }
@@ -464,36 +424,3 @@ impl Dentry {
         }
     }
 }
-
-#[no_mangle]
-pub extern "C" fn r_dget(dentry: *const Dentry) -> *const Dentry {
-    debug_assert!(!dentry.is_null());
-
-    unsafe { Arc::increment_strong_count(dentry) };
-    dentry
-}
-
-#[no_mangle]
-pub extern "C" fn r_dput(dentry: *const Dentry) {
-    debug_assert!(!dentry.is_null());
-
-    unsafe { Arc::from_raw(dentry) };
-}
-
-#[no_mangle]
-pub extern "C" fn r_dentry_is_directory(dentry: *const Dentry) -> bool {
-    let dentry = Dentry::from_raw(&dentry);
-
-    dentry
-        .data
-        .load()
-        .as_ref()
-        .map_or(false, |data| data.flags & D_DIRECTORY != 0)
-}
-
-#[no_mangle]
-pub extern "C" fn r_dentry_is_invalid(dentry: *const Dentry) -> bool {
-    let dentry = Dentry::from_raw(&dentry);
-
-    dentry.data.load().is_none()
-}

+ 0 - 22
src/kernel/vfs/ffi.rs

@@ -1,22 +0,0 @@
-use crate::io::RawBuffer;
-
-use super::{dentry::Dentry, inode::Inode};
-
-#[no_mangle]
-pub extern "C" fn fs_read(
-    file: *const Dentry, // borrowed
-    buf: *mut u8,
-    bufsize: usize,
-    offset: usize,
-    n: usize,
-) -> isize {
-    let file = Dentry::from_raw(&file);
-
-    let bufsize = bufsize.min(n);
-    let mut buffer = RawBuffer::new_from_raw(buf, bufsize);
-
-    match file.read(&mut buffer, offset) {
-        Ok(n) => n as isize,
-        Err(e) => -(e as isize),
-    }
-}

+ 0 - 30
src/kernel/vfs/filearray.rs

@@ -54,36 +54,6 @@ impl OpenFile {
     }
 }
 
-#[no_mangle]
-pub extern "C" fn r_filearray_new_for_init() -> *const FileArray {
-    Arc::into_raw(Arc::new(FileArray {
-        inner: Spin::new(FileArrayInner {
-            files: BTreeMap::new(),
-            fd_min_avail: 0,
-        }),
-    }))
-}
-
-#[no_mangle]
-pub extern "C" fn r_filearray_new_shared(other: *const FileArray) -> *const FileArray {
-    let other = BorrowedArc::from_raw(other);
-
-    Arc::into_raw(FileArray::new_shared(&other))
-}
-
-#[no_mangle]
-pub extern "C" fn r_filearray_new_cloned(other: *const FileArray) -> *const FileArray {
-    let other = BorrowedArc::from_raw(other);
-
-    Arc::into_raw(FileArray::new_cloned(&other))
-}
-
-#[no_mangle]
-pub extern "C" fn r_filearray_drop(other: *const FileArray) {
-    // SAFETY: `other` is a valid pointer from `Arc::into_raw()`.
-    unsafe { Arc::from_raw(other) };
-}
-
 impl FileArray {
     pub fn get_current<'lt>() -> &'lt Arc<Self> {
         &Thread::current().files

+ 0 - 1
src/kernel/vfs/mod.rs

@@ -8,7 +8,6 @@ use inode::Mode;
 use super::task::Thread;
 
 pub mod dentry;
-pub mod ffi;
 pub mod file;
 pub mod filearray;
 pub mod inode;

+ 10 - 28
src/kinit.cpp

@@ -1,4 +1,3 @@
-#include <assert.h>
 #include <stdint.h>
 #include <sys/utsname.h>
 
@@ -7,7 +6,6 @@
 
 #include <kernel/hw/acpi.hpp>
 #include <kernel/hw/pci.hpp>
-#include <kernel/hw/timer.hpp>
 #include <kernel/interrupt.hpp>
 #include <kernel/log.hpp>
 #include <kernel/mem/paging.hpp>
@@ -48,23 +46,6 @@ static inline void enable_sse() {
             : "rax");
 }
 
-void NORETURN real_kernel_init(mem::paging::pfn_t kernel_stack_pfn) {
-    // call global constructors
-    // NOTE: the initializer of global objects MUST NOT contain
-    // all kinds of memory allocations
-    for (auto* ctor = &start_ctors; ctor != &end_ctors; ++ctor)
-        (*ctor)();
-
-    init_interrupt();
-    hw::timer::init_pit();
-
-    hw::acpi::parse_acpi_tables();
-
-    init_pci();
-
-    init_scheduler(kernel_stack_pfn);
-}
-
 static inline void setup_early_kernel_page_table() {
     using namespace kernel::mem::paging;
 
@@ -81,12 +62,9 @@ static inline void setup_early_kernel_page_table() {
 
     // clear kernel bss
     memset((void*)BSS_ADDR, 0x00, BSS_LENGTH);
-
-    // clear empty page
-    memset(mem::physaddr<void>{(uintptr_t)EMPTY_PAGE_PFN}, 0x00, 0x1000);
 }
 
-extern "C" uintptr_t KIMAGE_PAGES_VALUE;
+extern "C" char KIMAGE_PAGES[];
 
 static inline void setup_buddy(uintptr_t addr_max) {
     using namespace kernel::mem;
@@ -97,6 +75,7 @@ static inline void setup_buddy(uintptr_t addr_max) {
     addr_max >>= 12;
     int count = (addr_max * sizeof(page) + 0x200000 - 1) / 0x200000;
 
+    auto KIMAGE_PAGES_VALUE = (size_t)KIMAGE_PAGES;
     pfn_t real_start_pfn = KERNEL_IMAGE_PADDR + KIMAGE_PAGES_VALUE * 0x1000;
     pfn_t aligned_start_pfn = real_start_pfn + 0x200000 - 1;
     aligned_start_pfn &= ~0x1fffff;
@@ -155,7 +134,7 @@ static inline void save_memory_info(bootloader_data* data) {
            sizeof(kernel::mem::info::e820_entries));
 }
 
-void setup_gdt() {
+static inline void setup_gdt() {
     // user code
     mem::gdt[3] = 0x0020'fa00'0000'0000;
     // user data
@@ -193,6 +172,8 @@ void setup_gdt() {
         : "ax", "memory");
 }
 
+extern "C" void rust_kinit(uintptr_t early_kstack_vaddr);
+
 extern "C" void NORETURN kernel_init(bootloader_data* data) {
     enable_sse();
 
@@ -217,12 +198,13 @@ extern "C" void NORETURN kernel_init(bootloader_data* data) {
 
     asm volatile(
         "mov %1, %%rdi\n\t"
-        "mov %2, %%rsp\n\t"
+        "lea -8(%2), %%rsp\n\t"
         "xor %%rbp, %%rbp\n\t"
-        "call *%0\n\t"
+        "mov %%rbp, (%%rsp)\n\t" // Clear previous frame pointer
+        "jmp *%0\n\t"
         :
-        : "r"(real_kernel_init), "g"(kernel_stack_pfn), "g"(kernel_stack_ptr)
-        :);
+        : "r"(rust_kinit), "g"(kernel_stack_pfn), "r"(kernel_stack_ptr)
+        : "memory");
 
     freeze();
 }

+ 129 - 57
src/lib.rs

@@ -11,6 +11,7 @@ extern crate alloc;
 mod bindings;
 
 mod driver;
+mod elf;
 mod fs;
 mod hash;
 mod io;
@@ -21,9 +22,14 @@ mod prelude;
 mod rcu;
 mod sync;
 
-use alloc::{ffi::CString, sync::Arc};
-use bindings::root::types::elf::{elf32_load, elf32_load_data};
+use alloc::ffi::CString;
+use elf::ParsedElf32;
 use kernel::{
+    mem::{
+        paging::Page,
+        phys::{CachedPP, PhysPtr as _},
+    },
+    task::{init_multitasking, Thread},
     vfs::{
         dentry::Dentry,
         mount::{do_mount, MS_NOATIME, MS_NODEV, MS_NOSUID, MS_RDONLY},
@@ -36,16 +42,32 @@ use prelude::*;
 
 #[panic_handler]
 fn panic(info: &core::panic::PanicInfo) -> ! {
-    println_fatal!("panicked at {:?}\n\t\t{}", info.location(), info.message());
+    if let Some(location) = info.location() {
+        println_fatal!(
+            "panicked at {}:{}:{}",
+            location.file(),
+            location.line(),
+            location.column()
+        );
+    } else {
+        println_fatal!("panicked at <UNKNOWN>");
+    }
+    println_fatal!();
+    println_fatal!("{}", info.message());
+
     arch::task::freeze()
 }
 
 extern "C" {
     fn _do_allocate(size: usize) -> *mut core::ffi::c_void;
     fn _do_deallocate(ptr: *mut core::ffi::c_void, size: core::ffi::c_size_t) -> i32;
+    fn init_pci();
 }
 
-use core::alloc::{GlobalAlloc, Layout};
+use core::{
+    alloc::{GlobalAlloc, Layout},
+    arch::{asm, global_asm},
+};
 
 struct Allocator {}
 unsafe impl GlobalAlloc for Allocator {
@@ -70,8 +92,56 @@ unsafe impl GlobalAlloc for Allocator {
 #[global_allocator]
 static ALLOCATOR: Allocator = Allocator {};
 
+global_asm!(
+    r"
+    .globl to_init_process
+    to_init_process:
+        push %rbp
+        mov %rbx, %rdi
+        jmp {}
+    ",
+    sym init_process,
+    options(att_syntax)
+);
+
+extern "C" {
+    fn to_init_process();
+}
+
 #[no_mangle]
-pub extern "C" fn late_init_rust(out_sp: *mut usize, out_ip: *mut usize) {
+pub extern "C" fn rust_kinit(early_kstack_pfn: usize) -> ! {
+    // We don't call global constructors.
+    // Rust doesn't need that, and we're not going to use global variables in C++.
+
+    kernel::interrupt::init().unwrap();
+
+    // TODO: Move this to rust.
+    unsafe { init_pci() };
+
+    kernel::vfs::mount::init_vfs().unwrap();
+
+    // We need root dentry to be present in constructor of `FsContext`.
+    // So call `init_vfs` first, then `init_multitasking`.
+    init_multitasking();
+    Thread::current().prepare_kernel_stack(|kstack| {
+        let mut writer = kstack.get_writer();
+        writer.entry = to_init_process;
+        writer.flags = 0x200;
+        writer.rbp = 0;
+        writer.rbx = early_kstack_pfn; // `to_init_process` arg
+        writer.finish();
+    });
+
+    arch::task::context_switch_light(
+        CachedPP::new(early_kstack_pfn).as_ptr(), // We will never come back
+        unsafe { Thread::current().get_sp_ptr() },
+    );
+    arch::task::freeze()
+}
+
+extern "C" fn init_process(early_kstack_pfn: usize) {
+    unsafe { Page::take_pfn(early_kstack_pfn, 9) };
+
     kernel::timer::init().unwrap();
 
     // Use the PIT timer for now.
@@ -83,66 +153,68 @@ pub extern "C" fn late_init_rust(out_sp: *mut usize, out_ip: *mut usize) {
     // We might want the serial initialized as soon as possible.
     driver::serial::init().unwrap();
 
-    kernel::vfs::mount::init_vfs().unwrap();
-
     driver::e1000e::register_e1000e_driver();
     driver::ahci::register_ahci_driver();
 
     fs::procfs::init();
     fs::fat32::init();
 
-    // mount fat32 /mnt directory
-    let fs_context = FsContext::get_current();
-    let mnt_dir = Dentry::open(&fs_context, Path::new(b"/mnt/").unwrap(), true).unwrap();
-
-    mnt_dir.mkdir(0o755).unwrap();
-
-    do_mount(
-        &mnt_dir,
-        "/dev/sda",
-        "/mnt",
-        "fat32",
-        MS_RDONLY | MS_NOATIME | MS_NODEV | MS_NOSUID,
-    )
-    .unwrap();
-
-    let init = Dentry::open(&fs_context, Path::new(b"/mnt/busybox").unwrap(), true)
-        .expect("kernel panic: init not found!");
-
-    let argv = vec![
-        CString::new("/mnt/busybox").unwrap(),
-        CString::new("sh").unwrap(),
-        CString::new("/mnt/initsh").unwrap(),
-    ];
-
-    let envp = vec![
-        CString::new("LANG=C").unwrap(),
-        CString::new("HOME=/root").unwrap(),
-        CString::new("PATH=/mnt").unwrap(),
-        CString::new("PWD=/").unwrap(),
-    ];
-
-    let argv_array = argv.iter().map(|x| x.as_ptr()).collect::<Vec<_>>();
-    let envp_array = envp.iter().map(|x| x.as_ptr()).collect::<Vec<_>>();
-
-    // load init
-    let mut load_data = elf32_load_data {
-        exec_dent: Arc::into_raw(init) as *mut _,
-        argv: argv_array.as_ptr(),
-        argv_count: argv_array.len(),
-        envp: envp_array.as_ptr(),
-        envp_count: envp_array.len(),
-        ip: 0,
-        sp: 0,
+    let (ip, sp) = {
+        // mount fat32 /mnt directory
+        let fs_context = FsContext::get_current();
+        let mnt_dir = Dentry::open(&fs_context, Path::new(b"/mnt/").unwrap(), true).unwrap();
+
+        mnt_dir.mkdir(0o755).unwrap();
+
+        do_mount(
+            &mnt_dir,
+            "/dev/sda",
+            "/mnt",
+            "fat32",
+            MS_RDONLY | MS_NOATIME | MS_NODEV | MS_NOSUID,
+        )
+        .unwrap();
+
+        let init = Dentry::open(&fs_context, Path::new(b"/mnt/busybox").unwrap(), true)
+            .expect("busybox should be present in /mnt");
+
+        let argv = vec![
+            CString::new("/mnt/busybox").unwrap(),
+            CString::new("sh").unwrap(),
+            CString::new("/mnt/initsh").unwrap(),
+        ];
+
+        let envp = vec![
+            CString::new("LANG=C").unwrap(),
+            CString::new("HOME=/root").unwrap(),
+            CString::new("PATH=/mnt").unwrap(),
+            CString::new("PWD=/").unwrap(),
+        ];
+
+        let elf = ParsedElf32::parse(init.clone()).unwrap();
+        elf.load(&Thread::current().process.mm_list, argv, envp)
+            .unwrap()
     };
 
-    let result = unsafe { elf32_load(&mut load_data) };
-    if result != 0 {
-        println_fatal!("Failed to load init: {}", result);
-    }
-
     unsafe {
-        *out_sp = load_data.sp;
-        *out_ip = load_data.ip;
+        asm!(
+            "mov %ax, %fs",
+            "mov %ax, %gs",
+            "mov ${ds}, %rax",
+            "mov %ax, %ds",
+            "mov %ax, %es",
+            "push ${ds}",
+            "push {sp}",
+            "push $0x200",
+            "push ${cs}",
+            "push {ip}",
+            "iretq",
+            ds = const 0x33,
+            cs = const 0x2b,
+            in("rax") 0,
+            ip = in(reg) ip.0,
+            sp = in(reg) sp.0,
+            options(att_syntax, noreturn),
+        );
     }
 }

+ 34 - 28
src/sync.rs

@@ -5,24 +5,44 @@ pub mod spin;
 pub mod strategy;
 
 pub mod preempt {
-    use core::sync::atomic::{compiler_fence, Ordering};
+    use core::sync::atomic::{compiler_fence, AtomicUsize, Ordering};
 
     /// TODO: This should be per cpu.
-    static mut PREEMPT_COUNT: usize = 0;
+    static PREEMPT_COUNT: AtomicUsize = AtomicUsize::new(0);
 
     #[inline(always)]
     pub fn disable() {
-        unsafe { PREEMPT_COUNT += 1 };
+        PREEMPT_COUNT.fetch_add(1, Ordering::Relaxed);
         compiler_fence(Ordering::SeqCst);
     }
 
     #[inline(always)]
     pub fn enable() {
         compiler_fence(Ordering::SeqCst);
-        unsafe { PREEMPT_COUNT -= 1 };
+        PREEMPT_COUNT.fetch_sub(1, Ordering::Relaxed);
+    }
+
+    #[inline(always)]
+    pub fn count() -> usize {
+        PREEMPT_COUNT.load(Ordering::Relaxed)
     }
 }
 
+#[no_mangle]
+pub extern "C" fn r_preempt_disable() {
+    preempt::disable();
+}
+
+#[no_mangle]
+pub extern "C" fn r_preempt_enable() {
+    preempt::enable();
+}
+
+#[no_mangle]
+pub extern "C" fn r_preempt_count() -> usize {
+    preempt::count()
+}
+
 pub type Spin<T> = lock::Lock<T, spin::SpinStrategy>;
 pub type Mutex<T> = lock::Lock<T, semaphore::SemaphoreStrategy<1>>;
 #[allow(dead_code)]
@@ -78,32 +98,18 @@ impl<T: Sized + Sync, U: ?Sized> Locked<T, U> {
 
 macro_rules! might_sleep {
     () => {
-        if cfg!(debug_assertions) {
-            if unsafe { $crate::bindings::root::kernel::async_::preempt_count() } != 0 {
-                println_fatal!("failed assertion");
-                unsafe { $crate::bindings::root::freeze() };
-            }
-        } else {
-            assert_eq!(
-                unsafe { $crate::bindings::root::kernel::async_::preempt_count() },
-                0,
-                "a might_sleep function called with preempt disabled"
-            );
-        }
+        assert_eq!(
+            $crate::sync::preempt::count(),
+            0,
+            "a might_sleep function called with preempt disabled"
+        );
     };
     ($n:expr) => {
-        if cfg!(debug_assertions) {
-            if unsafe { $crate::bindings::root::kernel::async_::preempt_count() } != $n {
-                println_fatal!("failed assertion");
-                unsafe { $crate::bindings::root::freeze() };
-            }
-        } else {
-            assert_eq!(
-                unsafe { $crate::bindings::root::kernel::async_::preempt_count() },
-                $n,
-                "a might_sleep function called with the preempt count not satisfying its requirement",
-            );
-        }
+        assert_eq!(
+            $crate::sync::preempt::count(),
+            $n,
+            "a might_sleep function called with the preempt count not satisfying its requirement",
+        );
     };
 }
 

+ 2 - 2
src/sync/condvar.rs

@@ -1,5 +1,5 @@
 use crate::{
-    kernel::task::{Scheduler, Thread, ThreadState},
+    kernel::task::{Scheduler, Thread},
     prelude::*,
     sync::preempt,
 };
@@ -7,7 +7,7 @@ use crate::{
 use super::{lock::Guard, strategy::LockStrategy};
 use alloc::{collections::vec_deque::VecDeque, sync::Arc};
 
-pub struct CondVar<const Interruptible: bool> {
+pub struct CondVar<const INTERRUPTIBLE: bool> {
     waiters: Spin<VecDeque<Arc<Thread>>>,
 }
 

+ 7 - 7
src/sync/lock.rs

@@ -96,7 +96,7 @@ impl<Value: ?Sized, Strategy: LockStrategy> Lock<Value, Strategy> {
     }
 }
 
-pub struct Guard<'lock, Value: ?Sized, Strategy: LockStrategy, const Write: bool = true> {
+pub struct Guard<'lock, Value: ?Sized, Strategy: LockStrategy, const WRITE: bool = true> {
     _phantom: core::marker::PhantomData<Strategy>,
     value: &'lock UnsafeCell<Value>,
     strategy_data: &'lock Strategy::StrategyData,
@@ -119,8 +119,8 @@ impl<'lock, Value: ?Sized, Strategy: LockStrategy> Guard<'lock, Value, Strategy>
     }
 }
 
-impl<'lock, Value: ?Sized, Strategy: LockStrategy, const Write: bool> Deref
-    for Guard<'lock, Value, Strategy, Write>
+impl<'lock, Value: ?Sized, Strategy: LockStrategy, const WRITE: bool> Deref
+    for Guard<'lock, Value, Strategy, WRITE>
 {
     type Target = Value;
 
@@ -137,8 +137,8 @@ impl<'lock, Value: ?Sized, Strategy: LockStrategy> DerefMut
     }
 }
 
-impl<'lock, Value: ?Sized, Strategy: LockStrategy, const Write: bool> AsRef<Value>
-    for Guard<'lock, Value, Strategy, Write>
+impl<'lock, Value: ?Sized, Strategy: LockStrategy, const WRITE: bool> AsRef<Value>
+    for Guard<'lock, Value, Strategy, WRITE>
 {
     fn as_ref(&self) -> &Value {
         unsafe { &*self.value.get() }
@@ -153,8 +153,8 @@ impl<'lock, Value: ?Sized, Strategy: LockStrategy> AsMut<Value>
     }
 }
 
-impl<'lock, Value: ?Sized, Strategy: LockStrategy, const Write: bool> Drop
-    for Guard<'lock, Value, Strategy, Write>
+impl<'lock, Value: ?Sized, Strategy: LockStrategy, const WRITE: bool> Drop
+    for Guard<'lock, Value, Strategy, WRITE>
 {
     fn drop(&mut self) {
         unsafe { Strategy::do_unlock(&self.strategy_data, &mut self.context) }

+ 0 - 180
src/types/elf.cpp

@@ -1,180 +0,0 @@
-#include <string>
-#include <vector>
-
-#include <errno.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <string.h>
-
-#include <types/elf.hpp>
-
-#include <kernel/mem/mm_list.hpp>
-#include <kernel/mem/vm_area.hpp>
-#include <kernel/process.hpp>
-#include <kernel/vfs.hpp>
-#include <kernel/vfs/dentry.hpp>
-
-static inline void __user_push32(uintptr_t* sp, uint32_t d) {
-    // TODO: use copy_to_user
-    *(--*(uint32_t**)sp) = d;
-}
-
-static inline void __user_push_string32(uintptr_t* sp, const char* str) {
-    size_t len = strlen(str);
-
-    *sp -= (len + 1);
-    *sp &= ~0xf; // align to 16 bytes
-
-    memcpy((void*)*sp, str, len + 1);
-}
-
-int types::elf::elf32_load(types::elf::elf32_load_data& d) {
-    auto exec = fs::dentry_pointer{d.exec_dent};
-    if (!exec)
-        return -ENOENT;
-
-    types::elf::elf32_header hdr{};
-    auto n_read = fs::fs_read(exec.get(), (char*)&hdr, sizeof(types::elf::elf32_header), 0,
-                              sizeof(types::elf::elf32_header));
-
-    if (n_read != sizeof(types::elf::elf32_header))
-        return -EINVAL;
-
-    if (hdr.magic[0] != 0x7f || hdr.magic[1] != 'E' || hdr.magic[2] != 'L' || hdr.magic[3] != 'F')
-        return -EINVAL;
-
-    size_t phents_size = hdr.phentsize * hdr.phnum;
-    size_t shents_size = hdr.shentsize * hdr.shnum;
-    std::vector<types::elf::elf32_program_header_entry> phents(hdr.phnum);
-    n_read = fs::fs_read(exec.get(), (char*)phents.data(), phents_size, hdr.phoff, phents_size);
-
-    // broken file or I/O error
-    if (n_read != phents_size)
-        return -EINVAL;
-
-    std::vector<types::elf::elf32_section_header_entry> shents(hdr.shnum);
-    n_read = fs::fs_read(exec.get(), (char*)shents.data(), shents_size, hdr.shoff, shents_size);
-
-    // broken file or I/O error
-    if (n_read != shents_size)
-        return -EINVAL;
-
-    // from now on, caller process is gone.
-    // so we can't just simply return to it on error.
-    auto& mms = current_process->mms;
-    mms.clear();
-
-    uintptr_t data_segment_end = 0;
-
-    for (const auto& phent : phents) {
-        if (phent.type != types::elf::elf32_program_header_entry::PT_LOAD)
-            continue;
-
-        auto vaddr = phent.vaddr & ~0xfff;
-        auto vlen = ((phent.vaddr + phent.memsz + 0xfff) & ~0xfff) - vaddr;
-        auto flen = ((phent.vaddr + phent.filesz + 0xfff) & ~0xfff) - vaddr;
-        auto fileoff = phent.offset & ~0xfff;
-
-        using namespace kernel::mem;
-        if (flen) {
-            mm_list::map_args args{};
-
-            args.vaddr = vaddr;
-            args.length = flen;
-            args.file = fs::d_get(exec);
-            args.file_offset = fileoff;
-
-            args.flags = MM_MAPPED;
-            if (phent.flags & elf32_program_header_entry::PF_W)
-                args.flags |= MM_WRITE;
-
-            if (phent.flags & elf32_program_header_entry::PF_X)
-                args.flags |= MM_EXECUTE;
-
-            if (auto ret = mms.mmap(args); ret != 0)
-                return ELF_LOAD_FAIL_NORETURN;
-        }
-
-        if (vlen > flen) {
-            mm_list::map_args args{};
-
-            args.vaddr = vaddr + flen;
-            args.length = vlen - flen;
-
-            args.flags = MM_ANONYMOUS;
-            if (phent.flags & elf32_program_header_entry::PF_W)
-                args.flags |= MM_WRITE;
-
-            if (phent.flags & elf32_program_header_entry::PF_X)
-                args.flags |= MM_EXECUTE;
-
-            if (auto ret = mms.mmap(args); ret != 0)
-                return ELF_LOAD_FAIL_NORETURN;
-        }
-
-        if (vaddr + vlen > data_segment_end)
-            data_segment_end = vaddr + vlen;
-    }
-
-    current_process->mms.register_brk(data_segment_end + 0x10000);
-
-    for (const auto& shent : shents) {
-        if (shent.sh_type == elf32_section_header_entry::SHT_NOBITS)
-            memset((char*)(uintptr_t)shent.sh_addr, 0x00, shent.sh_size);
-    }
-
-    // map stack area
-    if (1) {
-        using namespace kernel::mem;
-        mm_list::map_args args{};
-
-        args.vaddr = ELF32_STACK_TOP;
-        args.length = ELF32_STACK_SIZE;
-        args.flags = MM_ANONYMOUS | MM_WRITE;
-
-        if (auto ret = mms.mmap(args); ret != 0)
-            return ELF_LOAD_FAIL_NORETURN;
-    }
-
-    d.ip = hdr.entry;
-    d.sp = ELF32_STACK_BOTTOM;
-
-    auto* sp = &d.sp;
-
-    // fill information block area
-    std::vector<elf32_addr_t> args, envs;
-    for (size_t i = 0; i < d.envp_count; ++i) {
-        __user_push_string32(sp, d.envp[i]);
-        envs.push_back((uintptr_t)*sp);
-    }
-    for (size_t i = 0; i < d.argv_count; ++i) {
-        __user_push_string32(sp, d.argv[i]);
-        args.push_back((uintptr_t)*sp);
-    }
-
-    // push null auxiliary vector entry
-    __user_push32(sp, 0);
-    __user_push32(sp, 0);
-
-    // push 0 for envp
-    __user_push32(sp, 0);
-
-    // push envp
-    for (auto ent : envs)
-        __user_push32(sp, ent);
-
-    // push 0 for argv
-    __user_push32(sp, 0);
-
-    // push argv
-    for (int i = args.size() - 1; i >= 0; --i)
-        __user_push32(sp, args[i]);
-
-    // push argc
-    __user_push32(sp, args.size());
-
-    // TODO!!!: rename current thread
-    current_thread->name = "[thread]";
-
-    return 0;
-}

+ 2 - 4
src/types/libstdcpp.cpp

@@ -17,9 +17,7 @@ extern "C" void NORETURN __cxa_pure_virtual(void) {
         ;
 }
 
-void NORETURN __assert_fail(const char* statement, const char* file, int line,
-                            const char* func) {
-    kmsgf("Kernel assertion failed: (%s), %s:%d, %s", statement, file, line,
-          func);
+void NORETURN __assert_fail(const char* statement, const char* file, int line, const char* func) {
+    (void)statement, (void)file, (void)line, (void)func;
     freeze();
 }