Browse Source

slab and buddy allocator

greatbridf 9 months ago
parent
commit
a092424c56

+ 5 - 5
CMakeLists.txt

@@ -32,7 +32,6 @@ set(BOOTLOADER_SOURCES src/boot.s
                        src/mbr.S
                        src/asm/interrupt.s
                        src/asm/port_io.s
-                       src/asm/sys.s
                        )
 
 set(KERNEL_MAIN_SOURCES src/fs/fat.cpp
@@ -47,6 +46,8 @@ set(KERNEL_MAIN_SOURCES src/fs/fat.cpp
                         # src/kernel/syscall.cpp
                         src/kernel/syscall/fileops.cc
                         src/kernel/syscall/mount.cc
+                        src/kernel/mem/paging.cc
+                        src/kernel/mem/slab.cc
                         src/kernel/mem.cpp
                         src/kernel/module.cc
                         src/kernel/vfs.cpp
@@ -64,7 +65,6 @@ set(KERNEL_MAIN_SOURCES src/fs/fat.cpp
                         src/types/elf.cpp
                         src/types/libstdcpp.cpp
                         include/asm/port_io.h
-                        include/asm/sys.h
                         include/fs/fat.hpp
                         include/kernel/async/waitlist.hpp
                         include/kernel/async/lock.hpp
@@ -73,7 +73,9 @@ set(KERNEL_MAIN_SOURCES src/fs/fat.cpp
                         include/kernel/irq.hpp
                         include/kernel/process.hpp
                         include/kernel/syscall.hpp
-                        include/kernel/mem.h
+                        include/kernel/mem/paging.hpp
+                        include/kernel/mem/slab.hpp
+                        include/kernel/mem/types.hpp
                         include/kernel/mm.hpp
                         include/kernel/module.hpp
                         include/kernel/utsname.hpp
@@ -99,8 +101,6 @@ set(KERNEL_MAIN_SOURCES src/fs/fat.cpp
                         include/types/elf.hpp
                         include/types/hash_map.hpp
                         include/types/types.h
-                        include/types/size.h
-                        include/types/status.h
                         include/types/allocator.hpp
                         include/types/cplusplus.hpp
                         include/kernel/log.hpp

+ 17 - 9
doc/mem_layout.txt

@@ -9,18 +9,26 @@ physical memory
 0x102000 - 0x103000 : kernel PDPT for kernel space
 0x103000 - 0x104000 : kernel PD for kernel image
 0x104000 - 0x105000 : kernel PT for kernel image
+0x105000 - 0x106000 : kernel PD for struct page array#1
 
 0x106000 - 0x200000 : unused empty pages
-0x200000 - 0x300000 : first kernel bss page (2MB)
+0x200000 - 0x400000 : first kernel bss page (2MB)
 
 
 virtual address space
 
-0xffffff0000000000 - 0xffffff3fffffffff  256GB physical memory (cached)
-0xffffff4000000000 - 0xffffff7fffffffff  256GB physical memory (not cached)
-0xffffff8000000000 - 0xffffffbfffffffff  256GB kernel bss
-
-0xffffffc000000000 - 0xffffffffffbfffff ~256GB unused
-
-0xffffffffffc00000 - 0xffffffffffdfffff    2MB kernel image
-0xffffffffffe00000 - 0xffffffffffffffff    2MB unused
+0xffff ff0 000 000 000 - 0xffff ff3 fff fff fff  256GB physical memory (cached)
+0xffff ff4 000 000 000 - 0xffff ff7 fff fff fff  256GB physical memory (not cached)
+0xffff ff8 000 000 000 - 0xffff ff8 03f fff fff    1GB unused
+0xffff ff8 040 000 000 - 0xffff ff8 13f fff fff    4GB struct page array
+0xffff ff8 140 000 000 - 0xffff ff8 17f fff fff    1GB unused
+0xffff ff8 180 000 000 - 0xffff ffb fff fff fff  250GB kernel heap
+
+0xffff ffc 000 000 000 - 0xffff ffc 03f fff fff    1GB unused
+0xffff ffc 040 000 000 - 0xffff fff fbf fff fff  254GB kernel stack
+
+0xffff fff fc0 000 000 - 0xffff fff fc0 1ff fff    2MB unused
+0xffff fff fc0 200 000 - 0xffff fff fff 9ff fff 1016MB kernel bss
+0xffff fff fff a00 000 - 0xffff fff fff bff fff    2MB unused
+0xffff fff fff c00 000 - 0xffff fff fff dff fff    2MB kernel image
+0xffff fff fff e00 000 - 0xffff fff fff fff fff    2MB unused

+ 1 - 1
gblibc/include/stdint.h

@@ -22,7 +22,7 @@ typedef __UINTPTR_TYPE__ uintptr_t;
 typedef __INTPTR_TYPE__ intptr_t;
 
 typedef __SIZE_TYPE__ size_t;
-typedef int32_t ssize_t;
+typedef int64_t ssize_t;
 
 typedef uint64_t time_t;
 typedef int64_t time_diff_t;

+ 8 - 0
include/asm/port_io.h

@@ -30,6 +30,14 @@ extern void asm_hlt(void);
 extern void asm_cli(void);
 extern void asm_sti(void);
 
+inline void NORETURN die()
+{
+    for (;;) {
+        asm_cli();
+        asm_hlt();
+    }
+}
+
 #ifdef __cplusplus
 }
 #endif

+ 0 - 27
include/asm/sys.h

@@ -1,27 +0,0 @@
-#pragma once
-
-#include <kernel/mem.h>
-#include <types/types.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void asm_switch_pd(page_t pd_addr);
-void asm_enable_paging(pd_t pd_addr);
-
-pptr_t current_pd(void);
-
-// the limit should be set on the higher 16bit
-// e.g. (n * sizeof(segment_descriptor) - 1) << 16
-void asm_load_gdt(uint32_t limit, pptr_t addr);
-
-void asm_load_tr(uint16_t index);
-
-extern const uint32_t kernel_size;
-extern char* const bss_addr;
-extern const uint32_t bss_len;
-
-#ifdef __cplusplus
-}
-#endif

+ 0 - 3
include/fs/fat.hpp

@@ -7,9 +7,6 @@
 #include <string.h>
 #include <sys/types.h>
 
-#include <types/size.h>
-
-#include <kernel/mem.h>
 #include <kernel/vfs.hpp>
 
 namespace fs::fat {

+ 0 - 141
include/kernel/mem.h

@@ -1,141 +0,0 @@
-#pragma once
-
-#include <stdint.h>
-#include <types/size.h>
-
-#define PAGE_SIZE (0x1000)
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// don't forget to add the initial 1m to the total
-struct mem_size_info {
-    uint16_t n_1k_blks; // memory between 1m and 16m in 1k blocks
-    uint16_t n_64k_blks; // memory above 16m in 64k blocks
-};
-
-struct e820_mem_map_entry_20 {
-    uint64_t base;
-    uint64_t len;
-    uint32_t type;
-};
-
-struct e820_mem_map_entry_24 {
-    struct e820_mem_map_entry_20 in;
-    uint32_t acpi_extension_attr;
-};
-
-/*
- * page directory entry
- *
- * p   : present (1)
- * rw  : allow write (1)
- * us  : allow user access (1)
- * pwt : todo
- * pcd : todo
- * a   : accessed for linear address translation (1)
- * d   : dirty (1) (ignored)
- * ps  : use 4MiB pages (ignored)
- * addr: page table address
- */
-typedef union pde_t {
-    uint32_t v;
-    struct {
-        uint32_t p : 1;
-        uint32_t rw : 1;
-        uint32_t us : 1;
-        uint32_t pwt : 1;
-        uint32_t pcd : 1;
-        uint32_t a : 1;
-        uint32_t d : 1;
-        uint32_t ps : 1;
-        uint32_t ignored : 4;
-        page_t pt_page : 20;
-    } in;
-} pde_t;
-typedef pde_t (*pd_t)[1024];
-
-/*
- * page table entry
- *
- * p   : present (1)
- * rw  : allow write (1)
- * us  : allow user access (1)
- * pwt : todo
- * pcd : todo
- * a   : accessed for linear address translation (1)
- * d   : dirty (1)
- * pat : todo (ignored)
- * g   : used in cr4 mode (ignored)
- * addr: physical memory address
- */
-typedef union pte_t {
-    uint32_t v;
-    struct {
-        uint32_t p : 1;
-        uint32_t rw : 1;
-        uint32_t us : 1;
-        uint32_t pwt : 1;
-        uint32_t pcd : 1;
-        uint32_t a : 1;
-        uint32_t d : 1;
-        uint32_t pat : 1;
-        uint32_t g : 1;
-        uint32_t ignored : 3;
-        page_t page : 20;
-    } in;
-} pte_t;
-typedef pte_t (*pt_t)[1024];
-
-// in mem.cpp
-extern uint8_t e820_mem_map[1024];
-extern uint32_t e820_mem_map_count;
-extern uint32_t e820_mem_map_entry_size;
-extern struct mem_size_info mem_size_info;
-
-#define KERNEL_HEAP_START ((void*)0xd0000000)
-#define KERNEL_HEAP_LIMIT ((void*)0xd4000000)
-
-#define EARLY_KERNEL_PD_PAGE ((page_t)0x000001)
-
-void init_mem(void);
-
-#define KERNEL_CODE_SEGMENT (0x08)
-#define KERNEL_DATA_SEGMENT (0x10)
-#define USER_CODE_SEGMENT (0x18)
-#define USER_DATA_SEGMENT (0x20)
-#define USER_CODE_SELECTOR (USER_CODE_SEGMENT | 3)
-#define USER_DATA_SELECTOR (USER_DATA_SEGMENT | 3)
-
-#define SD_TYPE_CODE_SYSTEM (0x9a)
-#define SD_TYPE_DATA_SYSTEM (0x92)
-
-#define SD_TYPE_CODE_USER (0xfa)
-#define SD_TYPE_DATA_USER (0xf2)
-
-#define SD_TYPE_TSS (0x89)
-
-typedef struct segment_descriptor_struct {
-    uint64_t limit_low : 16;
-    uint64_t base_low : 16;
-    uint64_t base_mid : 8;
-    uint64_t access : 8;
-    uint64_t limit_high : 4;
-    uint64_t flags : 4;
-    uint64_t base_high : 8;
-} segment_descriptor;
-
-// in mem.cpp
-extern segment_descriptor gdt[7];
-
-void create_segment_descriptor(
-    segment_descriptor* sd,
-    uint32_t base,
-    uint32_t limit,
-    uint32_t flags,
-    uint32_t access);
-
-#ifdef __cplusplus
-}
-#endif

+ 107 - 0
include/kernel/mem/paging.hpp

@@ -0,0 +1,107 @@
+#pragma once
+
+#include <bit>
+#include <tuple>
+#include <cstddef>
+
+#include <stdint.h>
+
+#include <types/types.h>
+
+#include <kernel/mem/phys.hpp>
+
+namespace kernel::mem::paging {
+
+constexpr int idx_p5(uintptr_t vaddr) noexcept { return (vaddr >> 48) & 0x1ff; }
+constexpr int idx_p4(uintptr_t vaddr) noexcept { return (vaddr >> 39) & 0x1ff; }
+constexpr int idx_p3(uintptr_t vaddr) noexcept { return (vaddr >> 30) & 0x1ff; }
+constexpr int idx_p2(uintptr_t vaddr) noexcept { return (vaddr >> 21) & 0x1ff; }
+constexpr int idx_p1(uintptr_t vaddr) noexcept { return (vaddr >> 12) & 0x1ff; }
+
+constexpr std::tuple<int, int, int, int, int> idx_all(uintptr_t vaddr) noexcept
+{
+    return {idx_p5(vaddr), idx_p4(vaddr), idx_p3(vaddr), idx_p2(vaddr), idx_p1(vaddr)};
+}
+
+// page frame number
+// since we have large pages now, pfns are not shifted right
+using pfn_t = uintptr_t;
+
+// paging structure attributes
+using psattr_t = uintptr_t;
+
+constexpr psattr_t PA_P    = 0x0000000000000001ULL;
+constexpr psattr_t PA_RW   = 0x0000000000000002ULL;
+constexpr psattr_t PA_US   = 0x0000000000000004ULL;
+constexpr psattr_t PA_PWT  = 0x0000000000000008ULL;
+constexpr psattr_t PA_PCD  = 0x0000000000000010ULL;
+constexpr psattr_t PA_A    = 0x0000000000000020ULL;
+constexpr psattr_t PA_D    = 0x0000000000000040ULL;
+constexpr psattr_t PA_PS   = 0x0000000000000080ULL;
+constexpr psattr_t PA_G    = 0x0000000000000100ULL;
+constexpr psattr_t PA_COW  = 0x0000000000000200ULL; // copy on write
+constexpr psattr_t PA_MMAP = 0x0000000000000400ULL; // memory mapped
+constexpr psattr_t PA_FRE  = 0x0000000000000800ULL; // unused flag
+constexpr psattr_t PA_NXE  = 0x8000000000000000ULL;
+constexpr psattr_t PA_MASK = 0xfff0000000000fffULL;
+
+namespace __inner {
+    using pse_t = uint64_t;
+
+} // namespace __inner
+
+class PSE {
+    physaddr<__inner::pse_t> m_ptrbase;
+
+public:
+    explicit constexpr PSE(uintptr_t pptr) noexcept : m_ptrbase{pptr} {}
+
+    constexpr void clear() noexcept
+    {
+        *m_ptrbase = 0;
+    }
+
+    constexpr void set(psattr_t attributes, pfn_t pfn)
+    {
+        *m_ptrbase = (attributes & PA_MASK) | (pfn & ~PA_MASK);
+    }
+
+    constexpr PSE operator[](std::size_t nth) const noexcept
+    {
+        return PSE{m_ptrbase.phys() + 8 * nth};
+    }
+
+    constexpr PSE parse() const noexcept
+    {
+        return PSE{*m_ptrbase & ~PA_MASK};
+    }
+};
+
+constexpr PSE KERNEL_PAGE_TABLE{0x100000};
+
+constexpr unsigned long PAGE_PRESENT = 0x00000001;
+constexpr unsigned long PAGE_BUDDY   = 0x00000002;
+
+struct page {
+    refcount_t refcount;
+    unsigned long flags;
+
+    page* next;
+
+    // padding
+    uint64_t padding;
+};
+
+inline page* PAGE_ARRAY;
+
+void create_zone(uintptr_t start, uintptr_t end);
+
+// order represents power of 2
+page* alloc_page();
+page* alloc_pages(int order);
+void free_page(page* page, int order);
+
+pfn_t page_to_pfn(page* page);
+page* pfn_to_page(pfn_t pfn);
+
+} // namespace kernel::mem::paging

+ 46 - 0
include/kernel/mem/phys.hpp

@@ -0,0 +1,46 @@
+#pragma once
+
+#include <bit>
+#include <cstddef>
+
+#include <stdint.h>
+
+#include <types/types.h>
+
+namespace kernel::mem {
+
+template <typename T, bool Cached = true>
+class physaddr {
+    static constexpr uintptr_t PHYS_OFFSET =
+        Cached ? 0xffffff0000000000ULL : 0xffffff4000000000ULL;
+
+    uintptr_t m_ptr;
+
+public:
+    explicit constexpr physaddr(uintptr_t ptr) : m_ptr{ptr} {}
+    explicit constexpr physaddr(std::nullptr_t) : m_ptr{} {}
+
+    // cast to non-pointer types is prohibited
+    template <typename U, typename = std::enable_if_t<std::is_pointer_v<U>>>
+    constexpr U cast_to() const noexcept
+    {
+        return std::bit_cast<U>(m_ptr + PHYS_OFFSET);
+    }
+
+    constexpr operator T*() const noexcept
+    {
+        return cast_to<T*>();
+    }
+
+    constexpr T* operator->() const noexcept
+    {
+        return *this;
+    }
+
+    constexpr uintptr_t phys() const noexcept
+    {
+        return m_ptr;
+    }
+};
+
+} // namespace kernel::mem

+ 55 - 0
include/kernel/mem/slab.hpp

@@ -0,0 +1,55 @@
+#pragma once
+
+#include <cstddef>
+#include <type_traits>
+
+#include <stdint.h>
+
+#include "paging.hpp"
+#include "phys.hpp"
+
+namespace kernel::mem {
+
+struct slab_cache;
+
+struct slab_head {
+    slab_cache* cache;
+
+    slab_head* next;
+    slab_head* prev;
+
+    void* free;
+
+    unsigned int free_count;
+    unsigned int obj_size;
+};
+
+struct slab_cache {
+    slab_head* slabs_empty;
+    slab_head* slabs_partial;
+    slab_head* slabs_full;
+
+    std::size_t obj_size;
+};
+
+template <typename T>
+class slab_allocator {
+    using value_type = T;
+    using propagate_on_container_move_assignment = std::true_type;
+
+    // throws std::bad_alloc
+    [[nodiscard]] constexpr T* allocate(std::size_t n)
+    { return static_cast<T*>(::operator new(n * sizeof(T))); }
+
+    // TODO: check allocated size
+    constexpr void deallocate(T* ptr, std::size_t)
+    { ::operator delete(ptr); }
+};
+
+void init_slab_cache(slab_cache* cache, std::size_t obj_size);
+void slab_add_page(slab_cache* cache, paging::pfn_t pfn);
+
+void* slab_alloc(slab_cache* cache);
+void slab_free(void* ptr);
+
+} // namespace kernel::mem

+ 36 - 0
include/kernel/mem/types.hpp

@@ -0,0 +1,36 @@
+#pragma once
+
+#include <stdint.h>
+
+#include <cstddef>
+
+namespace kernel::mem {
+
+struct gdt_entry {
+    uint64_t limit_low : 16;
+    uint64_t base_low : 16;
+    uint64_t base_mid : 8;
+    uint64_t access : 8;
+    uint64_t limit_high : 4;
+    uint64_t flags : 4;
+    uint64_t base_high : 8;
+};
+
+struct e820_mem_map_entry {
+    uint64_t base;
+    uint64_t len;
+    uint32_t type;
+
+    // might not be valid
+    uint32_t acpi_extension_attr;
+};
+
+namespace info {
+    inline std::size_t memory_size;
+    inline std::size_t e820_entry_count;
+    inline std::size_t e820_entry_length;
+    inline e820_mem_map_entry e820_entries[(1024-16)/24];
+
+} // namespace info
+
+} // namespace kernel::mem

+ 37 - 177
include/kernel/mm.hpp

@@ -6,35 +6,14 @@
 #include <cstddef>
 #include <utility>
 
-#include <kernel/mem.h>
+#include <kernel/mem/paging.hpp>
 #include <kernel/vfs.hpp>
 #include <stdint.h>
 #include <types/allocator.hpp>
 #include <types/cplusplus.hpp>
-#include <types/size.h>
-#include <types/status.h>
 #include <types/types.h>
 
-#define invalidate_tlb(addr) asm volatile("invlpg (%0)" \
-                                 :             \
-                                 : "r"(addr)   \
-                                 : "memory")
-
-constexpr size_t THREAD_KERNEL_STACK_SIZE = 8 * PAGE_SIZE;
-
-constexpr uint32_t PAGE_COW = (1 << 0);
-constexpr uint32_t PAGE_MMAP = (1 << 1);
-#define PAGE_COW PAGE_COW
-#define PAGE_MMAP PAGE_MMAP
-
-struct page {
-    page_t phys_page_id;
-    size_t* ref_count;
-    // 0 :11 : pte_index
-    // 12:31 : pt_page
-    uint32_t pg_pteidx;
-    mutable uint32_t attr;
-};
+#define invalidate_tlb(addr) asm volatile("invlpg (%0)": : "r"(addr) : "memory")
 
 // private memory mapping
 // changes won't be neither written back to file nor shared between processes
@@ -50,23 +29,10 @@ int mmap(
     int write,
     int priv);
 
-template <uint32_t base, uint32_t expo>
-constexpr uint32_t pow()
-{
-    if constexpr (expo == 0)
-        return 1;
-    if constexpr (expo == 1)
-        return base;
-    if constexpr (expo % 2 == 0)
-        return pow<base, expo / 2>() * pow<base, expo / 2>();
-    else
-        return pow<base, expo / 2>() * pow<base, expo / 2 + 1>();
-}
-
 template <int N>
 constexpr std::size_t align_down(std::size_t v)
 {
-    return v & ~(pow<2, N>() - 1);
+    return v & ~((1 << N) - 1);
 }
 template <int N>
 constexpr void* align_down(void* v)
@@ -76,7 +42,7 @@ constexpr void* align_down(void* v)
 template <int N>
 constexpr std::size_t align_up(std::size_t v)
 {
-    return align_down<N>(v + pow<2, N>() - 1);
+    return align_down<N>(v + (1 << N) - 1);
 }
 template <int N>
 constexpr void* align_up(void* v)
@@ -97,52 +63,18 @@ constexpr void* vptradd(void* p, std::size_t off)
     return _p + off;
 }
 
-void dealloc_pd(page_t pd);
+// TODO: LONG MODE
+// void dealloc_pd(page_t pd);
 
 // allocate a struct page together with the raw page
-page allocate_page(void);
-void free_page(page* pg);
-
-// TODO: this is for alloc_kstack()
-// CHANGE THIS
-page_t __alloc_raw_page(void);
-void __free_raw_page(page_t pg);
+kernel::mem::paging::page allocate_page(void);
+void free_page(kernel::mem::paging::page* pg);
 
 namespace kernel {
 
-void* pmap(page_t pg, bool cached = true);
-void pfree(page_t pg);
-
-class paccess : public types::non_copyable {
-private:
-    page_t m_pg;
-    void* m_ptr;
-
-public:
-    paccess(void) = delete;
-    paccess(paccess&&) = delete;
-    paccess& operator=(paccess&&) = delete;
-
-    inline explicit paccess(page_t pg, bool cached = true)
-        : m_pg(pg)
-    {
-        m_ptr = pmap(pg, cached);
-    }
-
-    constexpr void* ptr(void) const { return m_ptr; }
-
-    ~paccess()
-    {
-        pfree(m_pg);
-    }
-};
-
-namespace memory {
+namespace mem {
 
 struct mm {
-public:
-    using pages_vector = std::vector<page, types::memory::ident_allocator<page>>;
-
 public:
     void* start {};
     struct mm_attr {
@@ -150,13 +82,13 @@ public:
         uint32_t system : 1;
         uint32_t mapped : 1;
     } attr {};
-    pages_vector* pgs {};
     fs::inode* mapped_file {};
     size_t file_offset {};
+    std::size_t page_count;
 
 public:
     constexpr void* end() const noexcept
-    { return vptradd(start, pgs->size() * PAGE_SIZE); }
+    { return vptradd(start, page_count * 4096); } // TODO: LONG MODE
     constexpr bool is_kernel_space() const noexcept
     { return attr.system; }
     constexpr bool is_avail(void* ostart, void* oend) const noexcept
@@ -167,7 +99,7 @@ public:
         return (ostart >= m_end || oend <= m_start);
     }
 
-    void append_page(pd_t pd, const page& pg, uint32_t attr, bool priv);
+    // void append_page(pd_t pd, const page& pg, uint32_t attr, bool priv); TODO: LONG MODE
 
     /**
      * @brief Splits the memory block at the specified address.
@@ -197,7 +129,8 @@ private:
     };
 
 public:
-    using list_type = std::set<mm, comparator, types::memory::ident_allocator<mm>>;
+    // TODO: LONG MODE: use slab allocator
+    using list_type = std::set<mm, comparator>;
     using iterator = list_type::iterator;
     using const_iterator = list_type::const_iterator;
 
@@ -206,12 +139,12 @@ public:
 
 private:
     list_type m_areas;
-    page_t m_pd;
+    kernel::mem::paging::pfn_t m_pd;
     mm* m_brk {};
 
 public:
     // for system initialization only
-    explicit constexpr mm_list(page_t pd)
+    explicit constexpr mm_list(kernel::mem::paging::pfn_t pd)
         : m_pd(pd) { }
 
     // default constructor copies kernel_mms
@@ -242,7 +175,6 @@ public:
                 .system = system,
                 .mapped = 0,
             },
-            .pgs = types::memory::kinew<mm::pages_vector>(),
         });
         assert(inserted);
         return *iter;
@@ -259,34 +191,36 @@ public:
                 continue;
             }
 
-            this->unmap(*iter);
+            // TODO: LONG MODE
+            // this->unmap(*iter);
             iter = m_areas.erase(iter);
         }
         m_brk = nullptr;
     }
 
-    inline void unmap(mm& area)
-    {
-        int i = 0;
+    // TODO: LONG MODE
+    // inline void unmap(mm& area)
+    // {
+    //     int i = 0;
 
-        // TODO:
-        // if there are more than 4 pages, calling invlpg
-        // should be faster. otherwise, we use movl cr3
-        // bool should_invlpg = (area->pgs->size() > 4);
+    //     // TODO:
+    //     // if there are more than 4 pages, calling invlpg
+    //     // should be faster. otherwise, we use movl cr3
+    //     // bool should_invlpg = (area->pgs->size() > 4);
 
-        for (auto& pg : *area.pgs) {
-            kernel::paccess pa(pg.pg_pteidx >> 12);
-            auto pt = (pt_t)pa.ptr();
-            assert(pt);
-            auto* pte = *pt + (pg.pg_pteidx & 0xfff);
-            pte->v = 0;
+    //     for (auto& pg : *area.pgs) {
+    //         kernel::paccess pa(pg.pg_pteidx >> 12);
+    //         auto pt = (pt_t)pa.ptr();
+    //         assert(pt);
+    //         auto* pte = *pt + (pg.pg_pteidx & 0xfff);
+    //         pte->v = 0;
 
-            free_page(&pg);
+    //         free_page(&pg);
 
-            invalidate_tlb((std::size_t)area.start + (i++) * PAGE_SIZE);
-        }
-        types::memory::kidelete<mm::pages_vector>(area.pgs);
-    }
+    //         invalidate_tlb((std::size_t)area.start + (i++) * PAGE_SIZE);
+    //     }
+    //     types::memory::kidelete<mm::pages_vector>(area.pgs);
+    // }
 
     constexpr mm* find(void* lp)
     {
@@ -324,77 +258,3 @@ public:
 } // namespace memory
 
 } // namespace kernel
-
-// global variables
-inline page empty_page;
-// --------------------------------
-
-// inline constexpr page* lto_page(mm* mm_area, void* l_ptr)
-// {
-//     size_t offset = vptrdiff(l_ptr, mm_area->start);
-//     return &mm_area->pgs->at(offset / PAGE_SIZE);
-// }
-// inline constexpr page_t to_page(pptr_t ptr)
-// {
-//     return ptr >> 12;
-// }
-// inline constexpr size_t to_pdi(page_t pg)
-// {
-//     return pg >> 10;
-// }
-// inline constexpr size_t to_pti(page_t pg)
-// {
-//     return pg & (1024 - 1);
-// }
-// inline constexpr pptr_t to_pp(page_t p)
-// {
-//     return p << 12;
-// }
-constexpr std::size_t v_to_pdi(void* addr)
-{
-    return std::bit_cast<std::size_t>(addr) >> 22;
-}
-constexpr std::size_t v_to_pti(void* addr)
-{
-    return (std::bit_cast<std::size_t>(addr) >> 12) & 0x3ff;
-}
-// inline constexpr pte_t* to_pte(pt_t pt, page_t pg)
-// {
-//     return *pt + to_pti(pg);
-// }
-// inline void* to_vp(page_t pg)
-// {
-//     return ptovp(to_pp(pg));
-// }
-// inline pd_t to_pd(page_t pg)
-// {
-//     return reinterpret_cast<pd_t>(to_vp(pg));
-// }
-// inline pt_t to_pt(page_t pg)
-// {
-//     return reinterpret_cast<pt_t>(to_vp(pg));
-// }
-// inline pt_t to_pt(pde_t* pde)
-// {
-//     return to_pt(pde->in.pt_page);
-// }
-// inline pde_t* to_pde(pd_t pd, void* addr)
-// {
-//     return *pd + lto_pdi((pptr_t)addr);
-// }
-// inline pte_t* to_pte(pt_t pt, void* addr)
-// {
-//     return *pt + lto_pti((pptr_t)addr);
-// }
-// inline pte_t* to_pte(pde_t* pde, void* addr)
-// {
-//     return to_pte(to_pt(pde), addr);
-// }
-// inline pte_t* to_pte(pd_t pd, void* addr)
-// {
-//     return to_pte(to_pde(pd, addr), addr);
-// }
-// inline pte_t* to_pte(pde_t* pde, page_t pg)
-// {
-//     return to_pte(to_pt(pde), pg);
-// }

+ 1 - 3
include/kernel/process.hpp

@@ -19,13 +19,11 @@
 #include <types/allocator.hpp>
 #include <types/cplusplus.hpp>
 #include <types/path.hpp>
-#include <types/status.h>
 #include <types/types.h>
 
 #include <kernel/async/waitlist.hpp>
 #include <kernel/interrupt.h>
 #include <kernel/mm.hpp>
-#include <kernel/mem.h>
 #include <kernel/user/thread_local.hpp>
 #include <kernel/signal.hpp>
 #include <kernel/task.h>
@@ -175,7 +173,7 @@ public:
     };
 
 public:
-    kernel::memory::mm_list mms {};
+    kernel::mem::mm_list mms {};
     std::set<kernel::task::thread> thds;
     kernel::async::wait_list waitlist;
 

+ 2 - 1
include/kernel/task/thread.hpp

@@ -47,7 +47,8 @@ public:
 
     std::string name {};
 
-    segment_descriptor tls_desc {};
+    // TODO: LONG MODE
+    // segment_descriptor tls_desc {};
 
     explicit thread(std::string name, pid_t owner);
     thread(const thread& val, pid_t owner);

+ 2 - 3
include/kernel/user/thread_local.hpp

@@ -1,7 +1,5 @@
 #pragma once
 
-#include <kernel/mem.h>
-
 #include <stdint.h>
 
 namespace kernel::user {
@@ -18,6 +16,7 @@ struct user_desc {
     uint32_t useable : 1;
 };
 
-void load_thread_area(const segment_descriptor& desc);
+// TODO: LONG MODE
+// void load_thread_area(const segment_descriptor& desc);
 
 } // namespace kernel::user

+ 0 - 41
include/types/allocator.hpp

@@ -11,12 +11,6 @@
 
 #include <kernel/async/lock.hpp>
 
-namespace kernel::kinit {
-
-void init_kernel_heap(void* start, std::size_t size);
-
-} // namespace kernel::kinit
-
 namespace types::memory {
 
 class brk_memory_allocator {
@@ -48,39 +42,4 @@ public:
     void deallocate(void* ptr);
 };
 
-void* kimalloc(std::size_t size);
-void kifree(void* ptr);
-
-template <typename T>
-struct ident_allocator {
-    using value_type = T;
-    using propagate_on_container_move_assignment = std::true_type;
-
-    constexpr ident_allocator() = default;
-
-    template <typename U>
-    constexpr ident_allocator(const ident_allocator<U>&) noexcept {}
-    
-    inline T* allocate(std::size_t n)
-    { return (T*)kimalloc(n * sizeof(T)); }
-    inline void deallocate(T* ptr, std::size_t) { return kifree(ptr); }
-};
-
-template <typename T, typename... Args>
-constexpr T* kinew(Args&&... args)
-{
-    ident_allocator<T> alloc { };
-    T* ptr = std::allocator_traits<ident_allocator<T>>::allocate(alloc, 1);
-    std::allocator_traits<ident_allocator<T>>::construct(alloc, ptr, std::forward<Args>(args)...);
-    return ptr;
-}
-
-template <typename T>
-constexpr void kidelete(T* ptr)
-{
-    ident_allocator<T> alloc { };
-    std::allocator_traits<ident_allocator<T>>::destroy(alloc, ptr);
-    std::allocator_traits<ident_allocator<T>>::deallocate(alloc, ptr, 1);
-}
-
 } // namespace types::memory

+ 0 - 2
include/types/elf.hpp

@@ -4,8 +4,6 @@
 #include <kernel/process.hpp>
 #include <kernel/vfs.hpp>
 #include <stdint.h>
-#include <types/size.h>
-#include <types/status.h>
 
 namespace types::elf {
 using elf32_addr_t = uint32_t;

+ 1 - 1
include/types/hash_map.hpp

@@ -57,7 +57,7 @@ template <typename T,
     std::enable_if_t<std::is_pointer_v<T> && !is_c_string_v<T>, bool> = true>
 inline hash_t hash(T val, std::size_t bits)
 {
-    return hash(std::bit_cast<ptr_t>(val), bits);
+    return hash(std::bit_cast<uintptr_t>(val), bits);
 }
 
 inline hash_t hash(const char* str, std::size_t bits)

+ 0 - 15
include/types/size.h

@@ -1,15 +0,0 @@
-#pragma once
-
-#include "stdint.h"
-
-#ifdef __GNUC__
-#define PACKED __attribute__((__packed__))
-#else
-#error "no definition for ((PACKED))"
-#endif
-
-typedef size_t ptr_t;
-typedef ssize_t diff_t;
-
-typedef ptr_t pptr_t;
-typedef ssize_t page_t;

+ 0 - 4
include/types/status.h

@@ -1,4 +0,0 @@
-#pragma once
-
-#define GB_OK (0)
-#define GB_FAILED (1)

+ 8 - 2
include/types/types.h

@@ -1,7 +1,5 @@
 #pragma once
 
-#include "size.h"
-#include "status.h"
 #include "stdint.h"
 
 #define __user
@@ -18,6 +16,12 @@
 #error "no definition for ((SECTION))"
 #endif
 
+#ifdef __GNUC__
+#define PACKED __attribute__((__packed__))
+#else
+#error "no definition for ((PACKED))"
+#endif
+
 #ifdef __GNUC__
 #define likely(expr) (__builtin_expect(!!(expr), 1))
 #define unlikely(expr) (__builtin_expect(!!(expr), 0))
@@ -26,6 +30,8 @@
 #define unlikely(expr) (!!(expr))
 #endif
 
+typedef size_t refcount_t;
+
 #ifdef __cplusplus
 #include <types/cplusplus.hpp>
 #endif

+ 0 - 53
src/asm/sys.s

@@ -1,53 +0,0 @@
-.text
-
-.global asm_switch_pd
-.type   asm_switch_pd @function
-asm_switch_pd:
-    mov 8(%rsp), %rax
-    shl $12, %rax
-    mov %rax, %cr3
-    ret
-
-.global current_pd
-.type   current_pd @function
-current_pd:
-    mov %cr3, %rax
-    ret
-
-.section .text.kinit
-
-.global asm_enable_paging
-.type   asm_enable_paging @function
-asm_enable_paging:
-    cli
-    // page directory address
-    mov 8(%rsp), %rax
-    mov %rax, %cr3
-
-    mov %cr0, %rax
-    // SET PE, WP, PG
-	mov $0x80010001, %rcx
-	or %rcx, %rax
-    mov %rax, %cr0
-
-    ret
-
-.global asm_load_gdt
-.type   asm_load_gdt @function
-asm_load_gdt:
-    ret
-# TODO: LONG MODE
-#     cli
-#     lea 14(%rsp), %rax
-#     lgdt (%rax)
-#     ljmp $0x08, $_asm_load_gdt_fin
-# _asm_load_gdt_fin:
-#     ret
-
-.global asm_load_tr
-.type   asm_load_tr @function
-asm_load_tr:
-    cli
-    mov 8(%rsp), %rax
-    ltr %ax
-    ret

+ 4 - 4
src/boot.s

@@ -50,8 +50,8 @@ _fill_loop1:
     adc $0, %edx
     loop _fill_loop1
 
-    # set PCD
-    or $0x00000010, %ebx
+    # set PCD, PWT
+    or $0x00000018, %ebx
     mov $256, %ecx
     xor %esi, %esi
 _fill_loop2:
@@ -66,8 +66,8 @@ _fill_loop2:
     # PML4E 0xff8
     mov %edi, %esi # 0x102000
     mov $0x100ff8, %edi
-    # clear PCD, PS
-    and $(~0x00000090), %ebx
+    # clear PCD, PWT, PS
+    and $(~0x00000098), %ebx
     call fill_pxe
 
     # PDPTE 0xff8

+ 3 - 5
src/fs/fat.cpp

@@ -7,10 +7,8 @@
 #include <stdio.h>
 
 #include <types/allocator.hpp>
-#include <types/status.h>
 
 #include <fs/fat.hpp>
-#include <kernel/mem.h>
 #include <kernel/mm.hpp>
 #include <kernel/module.hpp>
 #include <kernel/vfs.hpp>
@@ -142,7 +140,7 @@ int fat32::readdir(fs::inode* dir, size_t offset, const fs::vfs::filldir_func& f
             }
             auto ret = filldir(fname.c_str(), 0, ind, ind->mode & S_IFMT);
 
-            if (ret != GB_OK) {
+            if (ret != 0) {
                 release_cluster(next);
                 return nread;
             }
@@ -304,7 +302,7 @@ int fat32::inode_statx(dentry* ent, statx* st, unsigned int mask)
         st->stx_mask |= STATX_GID;
     }
 
-    return GB_OK;
+    return 0;
 }
 
 int fat32::inode_stat(dentry* dent, struct stat* st)
@@ -319,7 +317,7 @@ int fat32::inode_stat(dentry* dent, struct stat* st)
     st->st_blksize = 4096;
     st->st_blocks = (ind->size + 511) / 512;
     st->st_ino = ind->ino;
-    return GB_OK;
+    return 0;
 }
 
 static fat32* create_fat32(const char* source, unsigned long, const void*)

+ 1 - 3
src/fs/procfs.cc

@@ -5,8 +5,6 @@
 #include <sys/mount.h>
 #include <unistd.h>
 
-#include <types/status.h>
-
 #include <kernel/module.hpp>
 #include <kernel/vfs.hpp>
 #include <kernel/vfs/vfs.hpp>
@@ -160,7 +158,7 @@ public:
         for (const auto& [ ino, pf ] : files) {
             auto* ind = get_inode(ino);
             int ret = callback(pf.name.c_str(), 0, ind, ind->mode);
-            if (ret != GB_OK)
+            if (ret != 0)
                 return -EIO;
             ++nread;
         }

+ 4 - 3
src/kernel.ld

@@ -5,7 +5,8 @@ MEMORY
     MBR    (wx) : org = 0x0e00, l = 512
     STAGE1 (wx) : org = 0x1000, l = 4K
     PHYMEM (w)  : org = 0xffffff0000000000, len = 512 * 1024M
-    KBSS   (w)  : org = 0xffffffffff800000, len = 2M
+    PARRAY (w)  : org = 0xffffff8000000000, len = 128 * 1024M
+    KBSS   (w)  : org = 0xffffffffc0200000, len = 2M
     KIMAGE (wx) : org = 0xffffffffffc00000, len = 2M
 }
 
@@ -84,9 +85,9 @@ SECTIONS
 
         . = ALIGN(16);
 
-        bss_addr = .;
+        BSS_ADDR = .;
         QUAD(ABSOLUTE(BSS_START));
-        bss_len = .;
+        BSS_LENGTH = .;
         QUAD(BSS_END - BSS_START);
 
         . = ALIGN(0x1000);

+ 0 - 19
src/kernel/allocator.cc

@@ -156,29 +156,10 @@ void brk_memory_allocator::deallocate(void* ptr)
     unite_afterwards(blk);
 }
 
-static std::byte ki_heap[0x100000];
-static brk_memory_allocator ki_alloc(ki_heap, sizeof(ki_heap));
 static brk_memory_allocator* k_alloc;
 
-void* kimalloc(std::size_t size)
-{
-    return ki_alloc.allocate(size);
-}
-
-void kifree(void* ptr)
-{
-    ki_alloc.deallocate(ptr);
-}
-
 } // namespace types::memory
 
-SECTION(".text.kinit")
-void kernel::kinit::init_kernel_heap(void *start, std::size_t size)
-{
-    using namespace types::memory;
-    k_alloc = kinew<brk_memory_allocator>((std::byte*)start, size);
-}
-
 void* operator new(size_t sz)
 {
     void* ptr = types::memory::k_alloc->allocate(sz);

+ 20 - 35
src/kernel/hw/ahci.cc

@@ -1,3 +1,4 @@
+#include "kernel/mem/phys.hpp"
 #include <vector>
 #include <cstddef>
 #include <algorithm>
@@ -9,8 +10,6 @@
 #include <kernel/hw/pci.hpp>
 #include <kernel/irq.hpp>
 
-#include <types/size.h>
-
 #include <stdint.h>
 #include <errno.h>
 
@@ -22,6 +21,8 @@
 using namespace kernel::module;
 using namespace kernel::hw::pci;
 
+using kernel::mem::physaddr;
+
 constexpr uint32_t MAX_SPINS = 100000;
 
 constexpr uint16_t VENDOR_INTEL = 0x8086;
@@ -40,11 +41,8 @@ constexpr uint32_t PORT_CMD_CR = 0x00008000;
 namespace ahci {
 
 typedef volatile struct hba_port_t {
-    uint32_t command_list_base;
-    uint32_t command_list_base_upper;
-
-    uint32_t fis_base;
-    uint32_t fis_base_upper;
+    uint64_t command_list_base;
+    uint64_t fis_base;
 
     uint32_t interrupt_status;
     uint32_t interrupt_enable;
@@ -102,8 +100,7 @@ struct command_header {
 
     uint32_t volatile bytes_transferred;
 
-    uint32_t command_table_base;
-    uint32_t command_table_base_upper;
+    uint64_t command_table_base;
 
     uint32_t reserved1[4];
 };
@@ -220,8 +217,7 @@ struct received_fis {
 };
 
 struct prdt_entry {
-    uint32_t data_base;
-    uint32_t data_base_upper;
+    uint64_t data_base;
 
     uint32_t reserved0;
 
@@ -291,9 +287,8 @@ struct quick_queue {
 struct ahci_port {
 private:
     // quick_queue<32> qu;
-    page_t page;
+    physaddr<command_header, false> cmd_header;
     hba_port* port;
-    command_header* cmd_header { };
     received_fis* fis { };
     std::size_t sectors { -1U };
 
@@ -309,7 +304,7 @@ private:
 
         // for now, we read 3.5KB at most at a time
         // command fis and prdt will take up the lower 128+Bytes
-        auto cmdtable_page = __alloc_raw_page();
+        physaddr<command_table> cmdtable{nullptr}; // TODO: LONG MODE allocate a page
 
         // construct command header
         memset(cmd_header + n, 0x00, sizeof(command_header));
@@ -318,9 +313,8 @@ private:
 
         cmd_header[n].write = write;
         cmd_header[n].prdt_length = 1;
-        cmd_header[n].command_table_base = cmdtable_page << 12;
+        cmd_header[n].command_table_base = cmdtable.phys();
 
-        auto* cmdtable = (command_table*)kernel::pmap(cmdtable_page);
         memset(cmdtable, 0x00, sizeof(command_table) + sizeof(prdt_entry));
 
         // first, set up command fis
@@ -340,7 +334,7 @@ private:
 
         // fill in prdt
         auto* pprdt = cmdtable->prdt;
-        pprdt->data_base = (cmdtable_page << 12) + 512;
+        pprdt->data_base = cmdtable.phys() + 512;
         pprdt->byte_count = count;
         pprdt->interrupt = 1;
 
@@ -359,10 +353,9 @@ private:
         SPIN(port->command_issue & (1 << n), spins)
             return -1;
 
-        memcpy(buf, (char*)cmdtable + 512, count);
+        memcpy(buf, cmdtable.cast_to<char*>() + 512, count);
 
-        kernel::pfree(cmdtable_page);
-        __free_raw_page(cmdtable_page);
+        // TODO: free cmdtable
         return 0;
     }
 
@@ -377,14 +370,14 @@ private:
 
 public:
     explicit ahci_port(hba_port* port)
-        : page(__alloc_raw_page()), port(port) { }
+        // TODO: LONG MODE
+        : cmd_header{nullptr}, port(port) { }
 
     ~ahci_port()
     {
         if (!cmd_header)
             return;
-        kernel::pfree(page);
-        __free_raw_page(page);
+        // TODO: free cmd_header
     }
 
     ssize_t read(char* buf, std::size_t buf_size, std::size_t offset, std::size_t cnt)
@@ -425,13 +418,9 @@ public:
         //
         // port->interrupt_enable = 1;
 
-        port->command_list_base = page << 12;
-        port->command_list_base_upper = 0;
+        port->command_list_base = cmd_header.phys();
+        port->fis_base = cmd_header.phys() + 0x400;
 
-        port->fis_base = (page << 12) + 0x400;
-        port->fis_base_upper = 0;
-
-        cmd_header = (command_header*)kernel::pmap(page, false);
         fis = (received_fis*)(cmd_header + 1);
 
         if (start_command(port) != 0)
@@ -455,9 +444,6 @@ public:
     ~ahci_module()
     {
         // TODO: release PCI device
-        if (ghc)
-            kernel::pfree(dev->reg[PCI_REG_ABAR] >> 12);
-
         for (auto& item : ports) {
             if (!item)
                 continue;
@@ -506,10 +492,9 @@ public:
         auto ret = kernel::hw::pci::register_driver(VENDOR_INTEL, DEVICE_AHCI,
             [this](pci_device* dev) -> int {
                 this->dev = dev;
-                uint32_t abar_address = dev->reg[PCI_REG_ABAR];
 
-                void* base = kernel::pmap(abar_address >> 12, false);
-                this->ghc = (hba_ghc*)base;
+                physaddr<hba_ghc, false> pp_base{dev->reg[PCI_REG_ABAR]};
+                this->ghc = pp_base;
 
                 this->ghc->global_host_control =
                     this->ghc->global_host_control | 2; // set interrupt enable

+ 6 - 6
src/kernel/hw/serial.cpp

@@ -1,9 +1,10 @@
+#include <errno.h>
+#include <stdio.h>
+
 #include <asm/port_io.h>
 #include <kernel/hw/serial.h>
 #include <kernel/irq.hpp>
 #include <kernel/tty.hpp>
-#include <stdio.h>
-#include <types/status.h>
 
 static void serial_receive_data_interrupt(void)
 {
@@ -31,9 +32,8 @@ int32_t init_serial_port(port_id_t port)
     asm_outb(port + 0, 0xAE); // Test serial chip (send byte 0xAE and check if serial returns same byte)
 
     // Check if serial is faulty (i.e: not same byte as sent)
-    if (asm_inb(port + 0) != 0xAE) {
-        return GB_FAILED;
-    }
+    if (asm_inb(port + 0) != 0xAE)
+        return -EIO;
 
     // If serial is not faulty set it in normal operation mode
     // (not-loopback with IRQs enabled and OUT#1 and OUT#2 bits enabled)
@@ -43,7 +43,7 @@ int32_t init_serial_port(port_id_t port)
 
     kernel::irq::register_handler(4, serial_receive_data_interrupt);
 
-    return GB_OK;
+    return 0;
 }
 
 int32_t is_serial_has_data(port_id_t port)

+ 80 - 77
src/kernel/interrupt.cpp

@@ -5,7 +5,6 @@
 #include <stdint.h>
 #include <stdio.h>
 
-#include <types/size.h>
 #include <types/types.h>
 
 #include <asm/port_io.h>
@@ -15,7 +14,6 @@
 #include <kernel/interrupt.h>
 #include <kernel/irq.hpp>
 #include <kernel/log.hpp>
-#include <kernel/mem.h>
 #include <kernel/mm.hpp>
 #include <kernel/process.hpp>
 #include <kernel/vfs.hpp>
@@ -40,12 +38,12 @@ extern "C" void int13(); extern "C" void int14();
 extern "C" void syscall_stub();
 
 #define SET_UP_IRQ(N, SELECTOR)                   \
-    ptr_t addr_irq##N = (ptr_t)irq##N;            \
+    uintptr_t addr_irq##N = (uintptr_t)irq##N;            \
     set_idt_entry(IDT, 0x20 + (N), (addr_irq##N), \
         (SELECTOR), KERNEL_INTERRUPT_GATE_TYPE);
 
 #define SET_IDT_ENTRY_FN(N, FUNC_NAME, SELECTOR, TYPE) \
-    ptr_t addr_##FUNC_NAME = (ptr_t)FUNC_NAME;         \
+    uintptr_t addr_##FUNC_NAME = (uintptr_t)FUNC_NAME;         \
     set_idt_entry(IDT, (N), (addr_##FUNC_NAME), (SELECTOR), (TYPE));
 
 SECTION(".text.kinit")
@@ -84,8 +82,6 @@ static inline void NORETURN die(regs_64& regs, void* rip)
 SECTION(".text.kinit")
 void init_idt()
 {
-    asm_cli();
-
     memset(IDT, 0x00, sizeof(IDT));
 
     // invalid opcode
@@ -99,11 +95,16 @@ void init_idt()
     // system call
     SET_IDT_ENTRY_FN(0x80, syscall_stub, 0x08, USER_INTERRUPT_GATE_TYPE);
 
-    uint16_t idt_descriptor[3];
-    idt_descriptor[0] = sizeof(struct IDT_entry) * 256;
-    *((uint32_t*)(idt_descriptor + 1)) = (ptr_t)IDT;
+    uint64_t idt_descriptor[2];
+    idt_descriptor[0] = (sizeof(IDT_entry) * 256) << 48;
+    idt_descriptor[1] = (uintptr_t)IDT;
 
-    asm_load_idt(idt_descriptor, 0);
+    asm volatile(
+            "lidt (%0)"
+            :
+            : "r"((uintptr_t)idt_descriptor + 6)
+            :
+            );
 }
 
 using kernel::irq::irq_handler_t;
@@ -221,11 +222,11 @@ static inline void NORETURN _int14_kill_user(void)
 // page fault
 extern "C" void int14_handler(int14_data* d)
 {
-    kernel::memory::mm_list* mms = nullptr;
+    kernel::mem::mm_list* mms = nullptr;
     if (current_process) [[likely]]
         mms = &current_process->mms;
     else
-        mms = kernel::memory::mm_list::s_kernel_mms;
+        mms = kernel::mem::mm_list::s_kernel_mms;
 
     auto* mm_area = mms->find(d->l_addr);
     if (!mm_area) [[unlikely]] {
@@ -239,69 +240,70 @@ extern "C" void int14_handler(int14_data* d)
     if (d->error_code.user && mm_area->attr.system)
         _int14_kill_user();
 
-    page* page = &(*mm_area->pgs)[vptrdiff(d->l_addr, mm_area->start) / PAGE_SIZE];
-    kernel::paccess pa(page->pg_pteidx >> 12);
-    auto pt = (pt_t)pa.ptr();
-    assert(pt);
-    pte_t* pte = *pt + (page->pg_pteidx & 0xfff);
-
-    if (unlikely(d->error_code.present == 0 && !mm_area->mapped_file))
-        _int14_panic(d->v_eip, d->l_addr, d->error_code);
-
-    if (page->attr & PAGE_COW) {
-        // if it is a dying page
-        if (*page->ref_count == 1) {
-            page->attr &= ~PAGE_COW;
-            pte->in.p = 1;
-            pte->in.a = 0;
-            pte->in.rw = mm_area->attr.write;
-            return;
-        }
-        // duplicate the page
-        page_t new_page = __alloc_raw_page();
-
-        {
-            kernel::paccess pdst(new_page), psrc(page->phys_page_id);
-            auto* new_page_data = (char*)pdst.ptr();
-            auto* src = psrc.ptr();
-            assert(new_page_data && src);
-            memcpy(new_page_data, src, PAGE_SIZE);
-        }
-
-        pte->in.page = new_page;
-        pte->in.rw = mm_area->attr.write;
-        pte->in.a = 0;
-
-        --*page->ref_count;
-
-        page->ref_count = types::memory::kinew<size_t>(1);
-        page->attr &= ~PAGE_COW;
-        page->phys_page_id = new_page;
-    }
-
-    if (page->attr & PAGE_MMAP) {
-        pte->in.p = 1;
-
-        size_t offset = align_down<12>((std::size_t)d->l_addr);
-        offset -= (std::size_t)mm_area->start;
-
-        kernel::paccess pa(page->phys_page_id);
-        auto* data = (char*)pa.ptr();
-        assert(data);
-
-        int n = vfs_read(
-            mm_area->mapped_file,
-            data,
-            PAGE_SIZE,
-            mm_area->file_offset + offset,
-            PAGE_SIZE);
-
-        // TODO: send SIGBUS if offset is greater than real size
-        if (n != PAGE_SIZE)
-            memset(data + n, 0x00, PAGE_SIZE - n);
-
-        page->attr &= ~PAGE_MMAP;
-    }
+    // TODO: LONG MODE
+    // kernel::mem::paging::page* page = &(*mm_area->pgs)[vptrdiff(d->l_addr, mm_area->start) >> 12];
+    // kernel::paccess pa(page->pg_pteidx >> 12);
+    // auto pt = (pt_t)pa.ptr();
+    // assert(pt);
+    // pte_t* pte = *pt + (page->pg_pteidx & 0xfff);
+
+    // if (unlikely(d->error_code.present == 0 && !mm_area->mapped_file))
+    //     _int14_panic(d->v_eip, d->l_addr, d->error_code);
+
+    // if (page->attr & PAGE_COW) {
+    //     // if it is a dying page
+    //     if (*page->ref_count == 1) {
+    //         page->attr &= ~PAGE_COW;
+    //         pte->in.p = 1;
+    //         pte->in.a = 0;
+    //         pte->in.rw = mm_area->attr.write;
+    //         return;
+    //     }
+    //     // duplicate the page
+    //     page_t new_page = __alloc_raw_page();
+
+    //     {
+    //         kernel::paccess pdst(new_page), psrc(page->phys_page_id);
+    //         auto* new_page_data = (char*)pdst.ptr();
+    //         auto* src = psrc.ptr();
+    //         assert(new_page_data && src);
+    //         memcpy(new_page_data, src, PAGE_SIZE);
+    //     }
+
+    //     pte->in.page = new_page;
+    //     pte->in.rw = mm_area->attr.write;
+    //     pte->in.a = 0;
+
+    //     --*page->ref_count;
+
+    //     page->ref_count = types::memory::kinew<size_t>(1);
+    //     page->attr &= ~PAGE_COW;
+    //     page->phys_page_id = new_page;
+    // }
+
+    // if (page->attr & PAGE_MMAP) {
+    //     pte->in.p = 1;
+
+    //     size_t offset = align_down<12>((std::size_t)d->l_addr);
+    //     offset -= (std::size_t)mm_area->start;
+
+    //     kernel::paccess pa(page->phys_page_id);
+    //     auto* data = (char*)pa.ptr();
+    //     assert(data);
+
+    //     int n = vfs_read(
+    //         mm_area->mapped_file,
+    //         data,
+    //         PAGE_SIZE,
+    //         mm_area->file_offset + offset,
+    //         PAGE_SIZE);
+
+    //     // TODO: send SIGBUS if offset is greater than real size
+    //     if (n != PAGE_SIZE)
+    //         memset(data + n, 0x00, PAGE_SIZE - n);
+
+    //     page->attr &= ~PAGE_MMAP;
+    // }
 }
 
 extern "C" void irq_handler(
@@ -316,8 +318,9 @@ extern "C" void irq_handler(
     for (const auto& handler : s_irq_handlers[irqno])
         handler();
 
-    if (context->cs != USER_CODE_SEGMENT)
-        return;
+    // TODO: LONG MODE
+    // if (context->cs != USER_CODE_SEGMENT)
+    //     return;
 
     if (current_thread->signals.pending_signal())
         current_thread->signals.handle(context, mmxregs);

+ 156 - 395
src/kernel/mem.cpp

@@ -1,178 +1,48 @@
 #include <cstddef>
 
-#include <asm/port_io.h>
-#include <asm/sys.h>
 #include <assert.h>
 #include <errno.h>
-#include <kernel/mem.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include <asm/port_io.h>
+#include <kernel/mem/paging.hpp>
 #include <kernel/mm.hpp>
 #include <kernel/process.hpp>
 #include <kernel/task.h>
 #include <kernel/vga.hpp>
-#include <stdint.h>
-#include <stdio.h>
-#include <types/allocator.hpp>
-#include <types/bitmap.hpp>
-#include <types/size.h>
-#include <types/status.h>
-
-// constant values
-
-#define EMPTY_PAGE ((page_t)0)
-
-// ---------------------
-
-static size_t mem_size;
-static uint8_t _mem_bitmap[1024 * 1024 / 8];
-static types::bitmap mem_bitmap(
-    [](unsigned char*, std::size_t){}, _mem_bitmap,
-    1024 * 1024);
-
-// global
-segment_descriptor gdt[7];
-
-uint8_t e820_mem_map[1024];
-uint32_t e820_mem_map_count;
-uint32_t e820_mem_map_entry_size;
-struct mem_size_info mem_size_info;
-
-constexpr void mark_addr_len(pptr_t start, size_t n)
-{
-    if (n == 0)
-        return;
-    page_t start_page = align_down<12>(start) >> 12;
-    page_t end_page = align_up<12>(start + n) >> 12;
-    for (page_t i = start_page; i < end_page; ++i)
-        mem_bitmap.set(i);
-}
-
-constexpr void free_addr_len(pptr_t start, size_t n)
-{
-    if (n == 0)
-        return;
-    page_t start_page = align_down<12>(start) >> 12;
-    page_t end_page = align_up<12>(start + n) >> 12;
-    for (page_t i = start_page; i < end_page; ++i)
-        mem_bitmap.clear(i);
-}
-
-constexpr void mark_addr_range(pptr_t start, pptr_t end)
-{
-    mark_addr_len(start, end - start);
-}
-
-constexpr void free_addr_range(pptr_t start, pptr_t end)
-{
-    free_addr_len(start, end - start);
-}
-
-page_t __alloc_raw_page(void)
-{
-    const auto size = mem_bitmap.size();
-    for (size_t i = 0; i < size; ++i) {
-        if (mem_bitmap.test(i) == 0) {
-            mem_bitmap.set(i);
-            return i;
-        }
-    }
-    return -1;
-}
-
-void __free_raw_page(page_t pg)
-{
-    mem_bitmap.clear(pg);
-}
 
-page allocate_page(void)
-{
-    return page {
-        .phys_page_id = __alloc_raw_page(),
-        .ref_count = types::memory::kinew<size_t>(0),
-        .pg_pteidx = 0,
-        .attr = 0,
-    };
-}
-
-void free_page(page* pg)
-{
-    if (*pg->ref_count == 1) {
-        types::memory::kidelete<size_t>(pg->ref_count);
-        __free_raw_page(pg->phys_page_id);
-    } else {
-        --*pg->ref_count;
-    }
-}
-
-void dealloc_pd(page_t pd)
-{
-    {
-        kernel::paccess pa(pd);
-        auto p_pd = (pd_t)pa.ptr();
-        assert(p_pd);
-        for (pde_t* ent = (*p_pd); ent < (*p_pd) + 768; ++ent) {
-            if (!ent->in.p)
-                continue;
-            __free_raw_page(ent->in.pt_page);
-        }
-    }
-    __free_raw_page(pd);
-}
+#include <types/allocator.hpp>
 
-SECTION(".text.kinit")
-static inline void init_mem_layout(void)
+void dealloc_pd(kernel::mem::paging::pfn_t pd)
 {
-    mem_size = 1024 * mem_size_info.n_1k_blks;
-    mem_size += 64 * 1024 * mem_size_info.n_64k_blks;
-
-    // mark empty page
-    mark_addr_range(0x00000000, 0x00001000);
-    // mark kernel page directory
-    mark_addr_range(0x00001000, 0x00002000);
-    // mark kernel page table
-    mark_addr_range(0x00002000, 0x00006000);
-    // mark kernel early stack
-    mark_addr_range(0x00006000, 0x00008000);
-    // mark EBDA and upper memory as allocated
-    mark_addr_range(0x80000, 0x100000);
-
-    constexpr pptr_t PHYS_BSS_START = 0x100000;
     // TODO: LONG MODE
-    // mark .stage1 and .kinit
-    // mark_addr_range((pptr_t)__stage1_start, (pptr_t)__kinit_end);
-    // mark kernel .text to .data
-    // mark_addr_len((pptr_t)__kinit_end, __data_end - __text_start);
-    // mark kernel .bss
-    mark_addr_len(PHYS_BSS_START, bss_len);
-
-    if (e820_mem_map_entry_size == 20) {
-        struct e820_mem_map_entry_20* entry = (struct e820_mem_map_entry_20*)e820_mem_map;
-        for (uint32_t i = 0; i < e820_mem_map_count; ++i, ++entry) {
-            if (entry->type != 1) {
-                mark_addr_len(entry->base, entry->len);
-            }
-        }
-    } else {
-        struct e820_mem_map_entry_24* entry = (struct e820_mem_map_entry_24*)e820_mem_map;
-        for (uint32_t i = 0; i < e820_mem_map_count; ++i, ++entry) {
-            if (entry->in.type != 1) {
-                mark_addr_len(entry->in.base, entry->in.len);
-            }
-        }
-    }
+    // {
+    //     kernel::paccess pa(pd);
+    //     auto p_pd = (pd_t)pa.ptr();
+    //     assert(p_pd);
+    //     for (pde_t* ent = (*p_pd); ent < (*p_pd) + 768; ++ent) {
+    //         if (!ent->in.p)
+    //             continue;
+    //         __free_raw_page(ent->in.pt_page);
+    //     }
+    // }
+    // __free_raw_page(pd);
 }
 
-using kernel::memory::mm_list;
-using kernel::memory::mm;
+using kernel::mem::mm_list;
+using kernel::mem::mm;
 
 mm_list::mm_list()
     : m_areas(s_kernel_mms->m_areas)
 {
-    m_pd = __alloc_raw_page();
-    kernel::paccess pdst(m_pd), psrc(s_kernel_mms->m_pd);
-    auto* dst = pdst.ptr();
-    auto* src = psrc.ptr();
-    assert(dst && src);
-    memcpy(dst, src, PAGE_SIZE);
+    // TODO: LONG MODE
+    // m_pd = __alloc_raw_page();
+    // kernel::paccess pdst(m_pd), psrc(s_kernel_mms->m_pd);
+    // auto* dst = pdst.ptr();
+    // auto* src = psrc.ptr();
+    // assert(dst && src);
+    // memcpy(dst, src, PAGE_SIZE);
 }
 
 mm_list::mm_list(const mm_list& other)
@@ -192,14 +62,15 @@ mm_list::mm_list(const mm_list& other)
             area.file_offset = src.file_offset;
         }
 
-        paccess pa(m_pd);
-        pd_t pd = (pd_t)pa.ptr();
+        // TODO: LONG MODE
+        // paccess pa(m_pd);
+        // pd_t pd = (pd_t)pa.ptr();
 
-        for (const auto& pg : *src.pgs) {
-            area.append_page(pd, pg,
-                    PAGE_COW | (pg.attr & PAGE_MMAP),
-                    src.attr.system);
-        }
+        // for (const auto& pg : *src.pgs) {
+        //     area.append_page(pd, pg,
+        //             PAGE_COW | (pg.attr & PAGE_MMAP),
+        //             src.attr.system);
+        // }
     }
 }
 
@@ -214,15 +85,16 @@ mm_list::~mm_list()
 
 void mm_list::switch_pd() const
 {
-    asm_switch_pd(m_pd);
+    // TODO: LONG MODE
+    // asm_switch_pd(m_pd);
 }
 
 int mm_list::register_brk(void* addr)
 {
     if (!is_avail(addr))
-        return GB_FAILED;
+        return -ENOMEM;
     m_brk = &addarea(addr, true, false);
-    return GB_OK;
+    return 0;
 }
 
 void* mm_list::set_brk(void* addr)
@@ -233,13 +105,14 @@ void* mm_list::set_brk(void* addr)
     if (addr <= curbrk || !is_avail(curbrk, vptrdiff(addr, curbrk)))
         return curbrk;
 
-    kernel::paccess pa(m_pd);
-    pd_t pd = (pd_t)pa.ptr();
+    // TODO: LONG MODE
+    // kernel::paccess pa(m_pd);
+    // pd_t pd = (pd_t)pa.ptr();
 
-    while (curbrk < addr) {
-        m_brk->append_page(pd, empty_page, PAGE_COW, false);
-        curbrk = (char*)curbrk + PAGE_SIZE;
-    }
+    // while (curbrk < addr) {
+    //     m_brk->append_page(pd, empty_page, PAGE_COW, false);
+    //     curbrk = (char*)curbrk + PAGE_SIZE;
+    // }
 
     return curbrk;
 }
@@ -272,12 +145,12 @@ void* mm_list::find_avail(void* hint, size_t len, bool priv) const
 // TODO: write dirty pages to file
 int mm_list::unmap(void* start, size_t len, bool system)
 {
-    ptr_t addr = (ptr_t)start;
+    uintptr_t addr = (uintptr_t)start;
     void* end = vptradd(start, align_up<12>(len));
 
     // standard says that addr and len MUST be
     // page-aligned or the call is invalid
-    if (addr % PAGE_SIZE != 0)
+    if ((addr & 0xfff) != 0)
         return -EINVAL;
 
     // if doing user mode unmapping, check area privilege
@@ -288,16 +161,17 @@ int mm_list::unmap(void* start, size_t len, bool system)
 
     auto iter = m_areas.lower_bound(start);
 
+    // TODO: LONG MODE
     for ( ; iter != m_areas.end() && *iter < end; ) {
         if (!(start < *iter) && start != iter->start) {
             mm newmm = iter->split(start);
-            unmap(newmm);
+            // unmap(newmm);
             ++iter;
             continue;
         }
         else if (!(*iter < end)) {
             mm newmm = iter->split(end);
-            unmap(*iter);
+            // unmap(*iter);
             m_areas.erase(iter);
 
             bool inserted;
@@ -306,121 +180,125 @@ int mm_list::unmap(void* start, size_t len, bool system)
             break;
         }
         else {
-            unmap(*iter);
+            // unmap(*iter);
             iter = m_areas.erase(iter);
         }
     }
 
-    return GB_OK;
+    return 0;
 }
 
 mm& mm_list::add_empty_area(void *start, std::size_t page_count,
     uint32_t page_attr, bool w, bool system)
 {
-    auto& area = addarea(start, w, system);
-    kernel::paccess pa(m_pd);
-    pd_t pd = (pd_t)pa.ptr();
+    // TODO: LONG MODE
+    // auto& area = addarea(start, w, system);
+    // kernel::paccess pa(m_pd);
+    // pd_t pd = (pd_t)pa.ptr();
 
-    while (page_count--)
-        area.append_page(pd, empty_page, page_attr, system);
+    // while (page_count--)
+    //     area.append_page(pd, empty_page, page_attr, system);
 
-    return area;
+    // return area;
 }
 
-constexpr void map_raw_page_to_pte(
-    pte_t* pte, page_t page,
-    bool present, bool write, bool priv)
-{
-    // set P bit
-    pte->v = 0;
-    pte->in.p = present;
-    pte->in.rw = write;
-    pte->in.us = !priv;
-    pte->in.page = page;
-}
-
-void mm::append_page(pd_t pd, const page& pg, uint32_t attr, bool priv)
-{
-    assert(pd);
-
-    void* addr = this->end();
-    pde_t* pde = *pd + v_to_pdi(addr);
-
-    page_t pt_pg = 0;
-    pte_t* pte = nullptr;
-    // page table not exist
-    if (!pde->in.p) [[unlikely]] {
-        // allocate a page for the page table
-        pt_pg = __alloc_raw_page();
-        pde->in.p = 1;
-        pde->in.rw = 1;
-        pde->in.us = 1;
-        pde->in.pt_page = pt_pg;
-
-        auto pt = (pt_t)kernel::pmap(pt_pg);
-        assert(pt);
-        pte = *pt;
-
-        memset(pt, 0x00, PAGE_SIZE);
-    } else {
-        pt_pg = pde->in.pt_page;
-        auto pt = (pt_t)kernel::pmap(pt_pg);
-        assert(pt);
-        pte = *pt;
-    }
-
-    // map the page in the page table
-    int pti = v_to_pti(addr);
-    pte += pti;
-
-    map_raw_page_to_pte(
-        pte,
-        pg.phys_page_id,
-        !(attr & PAGE_MMAP),
-        false,
-        priv);
-
-    kernel::pfree(pt_pg);
-
-    if (unlikely((attr & PAGE_COW) && !(pg.attr & PAGE_COW))) {
-        kernel::paccess pa(pg.pg_pteidx >> 12);
-        auto* pg_pte = (pte_t*)pa.ptr();
-        assert(pg_pte);
-        pg_pte += (pg.pg_pteidx & 0xfff);
-        pg.attr |= PAGE_COW;
-        pg_pte->in.rw = 0;
-        pg_pte->in.a = 0;
-        invalidate_tlb(addr);
-    }
-
-    ++*pg.ref_count;
-
-    this->pgs->emplace_back(pg);
-    auto& emplaced = this->pgs->back();
-    emplaced.pg_pteidx = (pt_pg << 12) + pti;
-    emplaced.attr = attr;
-}
+// TODO: LONG MODE
+// constexpr void map_raw_page_to_pte(
+//     pte_t* pte, kernel::mem::paging::pfn_t page,
+//     bool present, bool write, bool priv)
+// {
+//     // set P bit
+//     pte->v = 0;
+//     pte->in.p = present;
+//     pte->in.rw = write;
+//     pte->in.us = !priv;
+//     pte->in.page = page;
+// }
+
+// TODO: LONG MODE
+// void mm::append_page(pd_t pd, const page& pg, uint32_t attr, bool priv)
+// {
+//     assert(pd);
+// 
+//     void* addr = this->end();
+//     pde_t* pde = *pd + v_to_pdi(addr);
+// 
+//     kernel::mem::paging::pfn_t pt_pg = 0;
+//     pte_t* pte = nullptr;
+//     // page table not exist
+//     if (!pde->in.p) [[unlikely]] {
+//         // allocate a page for the page table
+//         pt_pg = __alloc_raw_page();
+//         pde->in.p = 1;
+//         pde->in.rw = 1;
+//         pde->in.us = 1;
+//         pde->in.pt_page = pt_pg;
+// 
+//         auto pt = (pt_t)kernel::pmap(pt_pg);
+//         assert(pt);
+//         pte = *pt;
+// 
+//         memset(pt, 0x00, PAGE_SIZE);
+//     } else {
+//         pt_pg = pde->in.pt_page;
+//         auto pt = (pt_t)kernel::pmap(pt_pg);
+//         assert(pt);
+//         pte = *pt;
+//     }
+// 
+//     // map the page in the page table
+//     int pti = v_to_pti(addr);
+//     pte += pti;
+// 
+//     map_raw_kernel::mem::paging::pfn_to_pte(
+//         pte,
+//         pg.phys_page_id,
+//         !(attr & PAGE_MMAP),
+//         false,
+//         priv);
+// 
+//     kernel::pfree(pt_pg);
+// 
+//     if (unlikely((attr & PAGE_COW) && !(pg.attr & PAGE_COW))) {
+//         kernel::paccess pa(pg.pg_pteidx >> 12);
+//         auto* pg_pte = (pte_t*)pa.ptr();
+//         assert(pg_pte);
+//         pg_pte += (pg.pg_pteidx & 0xfff);
+//         pg.attr |= PAGE_COW;
+//         pg_pte->in.rw = 0;
+//         pg_pte->in.a = 0;
+//         invalidate_tlb(addr);
+//     }
+// 
+//     ++*pg.ref_count;
+// 
+//     this->pgs->emplace_back(pg);
+//     auto& emplaced = this->pgs->back();
+//     emplaced.pg_pteidx = (pt_pg << 12) + pti;
+//     emplaced.attr = attr;
+// }
 
 mm mm::split(void *addr)
 {
     assert(addr > start && addr < end());
-    assert((ptr_t)addr % PAGE_SIZE == 0);
+    assert((uintptr_t)addr % 4096 == 0);
 
-    size_t this_count = vptrdiff(addr, start) / PAGE_SIZE;
-    size_t new_count = pgs->size() - this_count;
+    size_t this_count = vptrdiff(addr, start) / 4096;
+    size_t new_count = page_count - this_count;
 
     mm newmm {
         .start = addr,
         .attr { attr },
-        .pgs = types::memory::kinew<mm::pages_vector>(),
         .mapped_file = mapped_file,
-        .file_offset = attr.mapped ? file_offset + this_count * PAGE_SIZE : 0,
+        .file_offset = attr.mapped ? file_offset + this_count * 4096 : 0,
+        .page_count = 0,
     };
 
-    for (size_t i = 0; i < new_count; ++i) {
-        newmm.pgs->emplace_back(pgs->back());
-        pgs->pop_back();
-    }
+    // TODO:
+    // for (size_t i = 0; i < new_count; ++i) {
+    //     newmm.pgs->emplace_back(pgs->back());
+    //     pgs->pop_back();
+    // }
 
     return newmm;
 }
@@ -435,25 +313,24 @@ int mmap(
 {
     auto& mms = current_process->mms;
 
-    if (file && !S_ISREG(file->mode) && !S_ISBLK(file->mode)) [[unlikely]] {
-        errno = EINVAL;
-        return GB_FAILED;
-    }
+    if (file && !S_ISREG(file->mode) && !S_ISBLK(file->mode)) [[unlikely]]
+        return -EINVAL;
 
     // TODO: find another address
-    assert(((ptr_t)hint & 0xfff) == 0);
+    assert(((uintptr_t)hint & 0xfff) == 0);
     // TODO: return failed
     assert((offset & 0xfff) == 0);
 
     size_t n_pgs = align_up<12>(len) >> 12;
 
-    if (!mms.is_avail(hint, len)) {
-        errno = EEXIST;
-        return GB_FAILED;
-    }
+    if (!mms.is_avail(hint, len))
+        return -EEXIST;
+
+    // TODO: LONG MODE
+    using namespace kernel::mem::paging;
 
     if (file) {
-        auto& mm = mms.add_empty_area(hint, n_pgs, PAGE_MMAP | PAGE_COW, write, priv);
+        auto& mm = mms.add_empty_area(hint, n_pgs, PA_MMAP | PA_COW, write, priv);
 
         mm.attr.mapped = 1;
         mm.mapped_file = file;
@@ -461,126 +338,10 @@ int mmap(
     }
     else {
         // private mapping of zero-filled pages
-        auto& mm = mms.add_empty_area(hint, n_pgs, PAGE_COW, write, priv);
+        auto& mm = mms.add_empty_area(hint, n_pgs, PA_COW, write, priv);
 
         mm.attr.mapped = 0;
     }
 
-    return GB_OK;
-}
-
-SECTION(".text.kinit")
-void init_mem(void)
-{
-    init_mem_layout();
-
-    // TODO: replace early kernel pd
-    auto* __kernel_mms = types::memory::kinew<kernel::memory::mm_list>(EARLY_KERNEL_PD_PAGE);
-    kernel::memory::mm_list::s_kernel_mms = __kernel_mms;
-
-    // create empty_page struct
-    empty_page.attr = 0;
-    empty_page.phys_page_id = EMPTY_PAGE;
-    empty_page.ref_count = types::memory::kinew<size_t>(2);
-    empty_page.pg_pteidx = 0x00002000;
-
-    // 0xd0000000 to 0xd4000000 or 3.5GiB, size 64MiB
-    __kernel_mms->add_empty_area(KERNEL_HEAP_START,
-        64 * 1024 * 1024 / PAGE_SIZE, PAGE_COW, true, true);
-
-    kernel::kinit::init_kernel_heap(KERNEL_HEAP_START,
-        vptrdiff(KERNEL_HEAP_LIMIT, KERNEL_HEAP_START));
-}
-
-SECTION(".text.kinit")
-void create_segment_descriptor(
-    segment_descriptor* sd,
-    uint32_t base,
-    uint32_t limit,
-    uint32_t flags,
-    uint32_t access)
-{
-    sd->base_low = base & 0x0000ffff;
-    sd->base_mid = ((base & 0x00ff0000) >> 16);
-    sd->base_high = ((base & 0xff000000) >> 24);
-    sd->limit_low = limit & 0x0000ffff;
-    sd->limit_high = ((limit & 0x000f0000) >> 16);
-    sd->access = access;
-    sd->flags = flags;
-}
-
-namespace __physmapper {
-struct mapped_area {
-    size_t ref;
-    void* ptr;
-};
-
-static types::hash_map<page_t, mapped_area,
-    types::memory::ident_allocator<std::pair<page_t, mapped_area>>>
-    mapped;
-static uint8_t _freebm[0x400 / 8];
-static types::bitmap freebm(
-    [](unsigned char*, std::size_t){}, _freebm, 0x400);
-} // namespace __physmapper
-
-void* kernel::pmap(page_t pg, bool cached)
-{
-    return nullptr;
-    // TODO: LONG MODE
-    // auto* const pmap_pt = std::bit_cast<pte_t*>(0xff001000);
-    // auto* const mapped_start = std::bit_cast<void*>(0xff000000);
-
-    // auto iter = __physmapper::mapped.find(pg);
-    // if (iter) {
-    //     auto [ idx, area ] = *iter;
-    //     ++area.ref;
-    //     return area.ptr;
-    // }
-
-    // for (int i = 2; i < 0x400; ++i) {
-    //     if (__physmapper::freebm.test(i) == 0) {
-    //         auto* pte = pmap_pt + i;
-    //         if (cached)
-    //             pte->v = 0x3;
-    //         else
-    //             pte->v = 0x13;
-    //         pte->in.page = pg;
-
-    //         void* ptr = vptradd(mapped_start, 0x1000 * i);
-    //         invalidate_tlb(ptr);
-
-    //         __physmapper::freebm.set(i);
-    //         __physmapper::mapped.emplace(pg,
-    //             __physmapper::mapped_area { 1, ptr });
-    //         return ptr;
-    //     }
-    // }
-
-    // return nullptr;
-}
-void kernel::pfree(page_t pg)
-{
-    // TODO: LONG MODE
-    // auto* const pmap_pt = std::bit_cast<pte_t*>(0xff001000);
-    // auto* const mapped_start = std::bit_cast<void*>(0xff000000);
-
-    // auto iter = __physmapper::mapped.find(pg);
-    // if (!iter)
-    //     return;
-    // auto& [ ref, ptr ] = iter->second;
-
-    // if (ref > 1) {
-    //     --ref;
-    //     return;
-    // }
-
-    // int i = vptrdiff(ptr, mapped_start);
-    // i /= 0x1000;
-
-    // auto* pte = pmap_pt + i;
-    // pte->v = 0;
-    // invalidate_tlb(ptr);
-
-    // __physmapper::freebm.clear(i);
-    // __physmapper::mapped.remove(iter);
+    return 0;
 }

+ 122 - 0
src/kernel/mem/paging.cc

@@ -0,0 +1,122 @@
+#include <kernel/mem/paging.hpp>
+#include <kernel/mem/slab.hpp>
+
+using namespace kernel::mem::paging;
+
+static struct zone_info {
+    page* next;
+    std::size_t count;
+} zones[52];
+
+constexpr int _msb(std::size_t x)
+{
+    int n = 0;
+    while (x >>= 1)
+        n++;
+    return n;
+}
+
+constexpr pfn_t buddy(pfn_t pfn, int order)
+{
+    return pfn ^ (1 << (order + 12));
+}
+
+constexpr pfn_t parent(pfn_t pfn, int order)
+{
+    return pfn & ~(1 << (order + 12));
+}
+
+// where order represents power of 2
+page* _create_zone(pfn_t pfn, int order)
+{
+    page* zone = pfn_to_page(pfn);
+    zone->flags |= PAGE_BUDDY;
+
+    zone->next = zones[order].next;
+    zones[order].next = zone;
+    zones[order].count++;
+
+    return zone;
+}
+
+void _split_zone(page* zone, int order, int target_order)
+{
+    while (order > target_order) {
+        pfn_t pfn = page_to_pfn(zone);
+        _create_zone(buddy(pfn, order - 1), order - 1);
+
+        order--;
+    }
+}
+
+page* _alloc_zone(int order)
+{
+    for (int i = order; i < 52; ++i) {
+        if (zones[i].count == 0)
+            continue;
+
+        auto* zone = zones[i].next;
+        zones[i].next = zone->next;
+
+        // TODO: set free bitmap
+        zone->refcount++;
+
+        if (i > order)
+            _split_zone(zone, i, order);
+
+        return zone;
+    }
+
+    return nullptr;
+}
+
+void kernel::mem::paging::create_zone(uintptr_t start, uintptr_t end)
+{
+    start += (4096 - 1);
+    start >>= 12;
+    end >>= 12;
+
+    if (start >= end)
+        return;
+
+    unsigned long low = start;
+    for (int i = 0; i < _msb(end); ++i, low >>= 1) {
+        if (!(low & 1))
+            continue;
+        _create_zone(low << (12+i), i);
+        low++;
+    }
+
+    low = 1 << _msb(end);
+    while (low < end) {
+        int order = _msb(end - low);
+        _create_zone(low << 12, order);
+        low |= (1 << order);
+    }
+}
+
+page* kernel::mem::paging::alloc_pages(int order)
+{
+    auto* zone = _alloc_zone(order);
+    if (!zone) {
+        // TODO: die
+        return nullptr;
+    }
+
+    return zone;
+}
+
+page* kernel::mem::paging::alloc_page()
+{
+    return alloc_pages(0);
+}
+
+pfn_t kernel::mem::paging::page_to_pfn(page* _page)
+{
+    return (pfn_t)(_page - PAGE_ARRAY) * 0x1000;
+}
+
+page* kernel::mem::paging::pfn_to_page(pfn_t pfn)
+{
+    return PAGE_ARRAY + pfn / 0x1000;
+}

+ 151 - 0
src/kernel/mem/slab.cc

@@ -0,0 +1,151 @@
+#include <cstddef>
+
+#include <assert.h>
+
+#include <kernel/mem/paging.hpp>
+#include <kernel/mem/slab.hpp>
+
+using namespace kernel::mem;
+
+constexpr std::size_t SLAB_PAGE_SIZE = 0x1000; // 4K
+
+template <typename ListNode>
+void list_insert(ListNode** head, ListNode* node)
+{
+    node->next = *head;
+    if (*head)
+        (*head)->prev = node;
+    *head = node;
+}
+
+template <typename ListNode>
+ListNode* list_get(ListNode** head)
+{
+    ListNode* node = *head;
+    if (node) {
+        *head = node->next;
+
+        node->next = nullptr;
+        node->prev = nullptr;
+    }
+    return node;
+}
+
+template <typename ListNode>
+void list_remove(ListNode** head, ListNode* node)
+{
+    if (node->prev)
+        node->prev->next = node->next;
+    else
+        *head = node->next;
+
+    if (node->next)
+        node->next->prev = node->prev;
+
+    node->next = nullptr;
+    node->prev = nullptr;
+}
+
+std::ptrdiff_t _slab_data_start_offset(std::size_t size)
+{
+    return (sizeof(slab_head) + size - 1) & ~(size - 1);
+}
+
+std::size_t _slab_max_count(std::size_t size)
+{
+    return (SLAB_PAGE_SIZE - _slab_data_start_offset(size)) / size;
+}
+
+void* _slab_head_alloc(slab_head* slab)
+{
+    if (slab->free_count == 0)
+        return nullptr;
+
+    void* ptr = slab->free;
+    slab->free = *(void**)ptr;
+    slab->free_count--;
+
+    return ptr;
+}
+
+slab_head* _make_slab(uintptr_t start, std::size_t size)
+{
+    slab_head* slab = physaddr<slab_head>{start};
+
+    slab->obj_size = size;
+    slab->free_count = _slab_max_count(size);
+    slab->next = nullptr;
+    slab->prev = nullptr;
+
+    slab->free = physaddr<void>{start + _slab_data_start_offset(size)};
+
+    std::byte* ptr = (std::byte*)slab->free;
+    for (unsigned i = 0; i < slab->free_count; ++i) {
+        if (i == slab->free_count-1)
+            *(void**)ptr = nullptr;
+        else
+            *(void**)ptr = ptr + size;
+        ++ptr;
+    }
+
+    return slab;
+}
+
+void* kernel::mem::slab_alloc(slab_cache* cache) {
+    slab_head* slab = cache->slabs_partial;
+    if (!slab) { // no partial slabs, try to get an empty slab
+        if (!cache->slabs_empty) // no empty slabs, create a new one
+            slab_add_page(cache, paging::page_to_pfn(paging::alloc_page()));
+
+        slab = list_get(&cache->slabs_empty);
+
+        list_insert(&cache->slabs_partial, slab);
+    }
+
+    void* ptr = _slab_head_alloc(slab);
+
+    if (slab->free_count == 0) { // slab is full
+        list_remove(&cache->slabs_partial, slab);
+        list_insert(&cache->slabs_full, slab);
+    }
+
+    return ptr;
+}
+
+void kernel::mem::slab_free(void* ptr) {
+    slab_head* slab = (slab_head*)((uintptr_t)ptr & ~(SLAB_PAGE_SIZE-1));
+
+    *(void**)ptr = slab->free;
+    slab->free = ptr;
+    slab->free_count++;
+
+    if (slab->free_count == _slab_max_count(slab->obj_size)) {
+        auto* cache = slab->cache;
+        slab_head** head = nullptr;
+
+        if (cache->slabs_full == slab) {
+            head = &cache->slabs_full;
+        } else {
+            assert(cache->slabs_partial == slab);
+            head = &cache->slabs_partial;
+        }
+
+        list_remove(head, slab);
+        list_insert(&cache->slabs_empty, slab);
+    }
+}
+
+void kernel::mem::slab_add_page(slab_cache* cache, paging::pfn_t pfn) {
+    auto slab = _make_slab(pfn, cache->obj_size);
+    slab->cache = cache;
+
+    list_insert(&cache->slabs_empty, slab);
+}
+
+void kernel::mem::init_slab_cache(slab_cache* cache, std::size_t obj_size)
+{
+    cache->obj_size = obj_size;
+    cache->slabs_empty = nullptr;
+    cache->slabs_partial = nullptr;
+    cache->slabs_full = nullptr;
+}

+ 9 - 14
src/kernel/process.cpp

@@ -10,19 +10,14 @@
 #include <sys/wait.h>
 
 #include <types/allocator.hpp>
-#include <types/bitmap.hpp>
 #include <types/cplusplus.hpp>
 #include <types/elf.hpp>
-#include <types/size.h>
-#include <types/status.h>
 #include <types/types.h>
 
 #include <asm/port_io.h>
-#include <asm/sys.h>
 #include <kernel/async/lock.hpp>
 #include <kernel/interrupt.h>
 #include <kernel/log.hpp>
-#include <kernel/mem.h>
 #include <kernel/mm.hpp>
 #include <kernel/module.hpp>
 #include <kernel/process.hpp>
@@ -162,7 +157,7 @@ int filearr::open(const process &current,
             if (!parent)
                 return -EINVAL;
             int ret = fs::vfs_mkfile(parent, filename.c_str(), mode);
-            if (ret != GB_OK)
+            if (ret != 0)
                 return ret;
             dentry = fs::vfs_open(*current.root, filepath);
             assert(dentry);
@@ -393,16 +388,16 @@ void proclist::kill(pid_t pid, int exit_code)
 
 static void release_kinit()
 {
-    kernel::paccess pa(EARLY_KERNEL_PD_PAGE);
-    auto pd = (pd_t)pa.ptr();
-    assert(pd);
-    (*pd)[0].v = 0;
+    // TODO: LONG MODE
+    // kernel::paccess pa(EARLY_KERNEL_PD_PAGE);
+    // auto pd = (pd_t)pa.ptr();
+    // assert(pd);
+    // (*pd)[0].v = 0;
 
-    // free pt#0
-    __free_raw_page(0x00002);
+    // // free pt#0
+    // __free_raw_page(0x00002);
 
     // free .stage1 and .kinit
-    // TODO: LONG MODE
     // for (uint32_t i = ((uint32_t)__stage1_start >> 12);
     //         i < ((uint32_t)__kinit_end >> 12); ++i) {
     //     __free_raw_page(i);
@@ -471,7 +466,7 @@ void NORETURN _kernel_init(void)
     }
 
     int ret = types::elf::elf32_load(&d);
-    assert(ret == GB_OK);
+    assert(ret == 0);
 
     asm volatile(
         "mov $0x23, %%ax\n"

+ 41 - 38
src/kernel/task/thread.cc

@@ -50,23 +50,24 @@ thread::kernel_stack::kernel_stack()
         return;
     }
 
+    // TODO: LONG MODE
     // kernel stack pt is at page#0x00005
-    kernel::paccess pa(0x00005);
-    auto pt = (pt_t)pa.ptr();
-    assert(pt);
+    // kernel::paccess pa(0x00005);
+    // auto pt = (pt_t)pa.ptr();
+    // assert(pt);
 
-    int cnt = THREAD_KERNEL_STACK_SIZE / PAGE_SIZE;
-    pte_t* pte = *pt + allocated * cnt;
+    // int cnt = THREAD_KERNEL_STACK_SIZE / PAGE_SIZE;
+    // pte_t* pte = *pt + allocated * cnt;
 
-    for (int i = 0; i < cnt; ++i) {
-        pte[i].v = 0x3;
-        pte[i].in.page = __alloc_raw_page();
-    }
+    // for (int i = 0; i < cnt; ++i) {
+    //     pte[i].v = 0x3;
+    //     pte[i].in.page = __alloc_raw_page();
+    // }
 
-    stack_base = (std::byte*)(0xffc00000 + THREAD_KERNEL_STACK_SIZE * (allocated + 1));
-    esp = (uint32_t*)stack_base;
+    // stack_base = (std::byte*)(0xffc00000 + THREAD_KERNEL_STACK_SIZE * (allocated + 1));
+    // esp = (uint32_t*)stack_base;
 
-    ++allocated;
+    // ++allocated;
 }
 
 thread::kernel_stack::kernel_stack(const kernel_stack& other)
@@ -141,34 +142,36 @@ void thread::send_signal(signal_list::signo_type signal)
 
 int thread::set_thread_area(kernel::user::user_desc* ptr)
 {
-    if (ptr->read_exec_only && ptr->seg_not_present) {
-        void* dst = (void*)ptr->base_addr;
-        std::size_t len = ptr->limit;
-        if (len > 0 && dst)
-            memset(dst, 0x00, len);
-        return 0;
-    }
-
-    if (ptr->entry_number == -1U)
-        ptr->entry_number = 6;
-    else
-        return -1;
-
-    tls_desc.limit_low = ptr->limit & 0xFFFF;
-    tls_desc.base_low = ptr->base_addr & 0xFFFF;
-    tls_desc.base_mid = (ptr->base_addr >> 16) & 0xFF;
-    tls_desc.access = SD_TYPE_DATA_USER;
-    tls_desc.limit_high = (ptr->limit >> 16) & 0xF;
-    tls_desc.flags = (ptr->limit_in_pages << 3) | (ptr->seg_32bit << 2);
-    tls_desc.base_high = (ptr->base_addr >> 24) & 0xFF;
-
-    return 0;
+    // TODO: LONG MODE
+    // if (ptr->read_exec_only && ptr->seg_not_present) {
+    //     void* dst = (void*)ptr->base_addr;
+    //     std::size_t len = ptr->limit;
+    //     if (len > 0 && dst)
+    //         memset(dst, 0x00, len);
+    //     return 0;
+    // }
+
+    // if (ptr->entry_number == -1U)
+    //     ptr->entry_number = 6;
+    // else
+    //     return -1;
+
+    // tls_desc.limit_low = ptr->limit & 0xFFFF;
+    // tls_desc.base_low = ptr->base_addr & 0xFFFF;
+    // tls_desc.base_mid = (ptr->base_addr >> 16) & 0xFF;
+    // tls_desc.access = SD_TYPE_DATA_USER;
+    // tls_desc.limit_high = (ptr->limit >> 16) & 0xF;
+    // tls_desc.flags = (ptr->limit_in_pages << 3) | (ptr->seg_32bit << 2);
+    // tls_desc.base_high = (ptr->base_addr >> 24) & 0xFF;
+
+    // return 0;
 }
 
 int thread::load_thread_area() const
 {
-    if (tls_desc.flags == 0)
-        return -1;
-    kernel::user::load_thread_area(tls_desc);
-    return 0;
+    // TODO: LONG MODE
+    // if (tls_desc.flags == 0)
+    //     return -1;
+    // kernel::user::load_thread_area(tls_desc);
+    // return 0;
 }

+ 12 - 12
src/kernel/user/thread_local.cc

@@ -1,5 +1,4 @@
 #include <kernel/process.hpp>
-#include <kernel/mem.h>
 #include <kernel/user/thread_local.hpp>
 
 #include <string.h>
@@ -7,16 +6,17 @@
 
 namespace kernel::user {
 
-void load_thread_area(const segment_descriptor& desc)
-{
-    gdt[6] = desc;
-    asm volatile(
-        "mov %%gs, %%ax\n\t"
-        "mov %%ax, %%gs\n\t"
-        :
-        :
-        : "ax"
-    );
-}
+// TODO: LONG MODE
+// void load_thread_area(const segment_descriptor& desc)
+// {
+//     gdt[6] = desc;
+//     asm volatile(
+//         "mov %%gs, %%ax\n\t"
+//         "mov %%ax, %%gs\n\t"
+//         :
+//         :
+//         : "ax"
+//     );
+// }
 
 } // namespace kernel::user

+ 6 - 8
src/kernel/vfs.cpp

@@ -15,10 +15,8 @@
 
 #include <types/allocator.hpp>
 #include <types/path.hpp>
-#include <types/status.h>
 
 #include <kernel/log.hpp>
-#include <kernel/mem.h>
 #include <kernel/process.hpp>
 #include <kernel/tty.hpp>
 #include <kernel/vfs.hpp>
@@ -55,7 +53,7 @@ int dentry::load()
                 else
                     append(ind, dentry::name_type(name, len));
 
-                return GB_OK;
+                return 0;
             });
 
         if (ret == 0)
@@ -296,7 +294,7 @@ ssize_t fs::regular_file::do_write(const char* __user buf, size_t n)
     return n_wrote;
 }
 
-ssize_t fs::regular_file::seek(off_t n, int whence)
+off_t fs::regular_file::seek(off_t n, int whence)
 {
     if (!S_ISREG(mode))
         return -ESPIPE;
@@ -337,7 +335,7 @@ int fs::regular_file::getdents(char* __user buf, size_t cnt)
 
             size_t reclen = sizeof(fs::user_dirent) + 1 + len;
             if (cnt < reclen)
-                return GB_FAILED;
+                return -EFAULT;
 
             auto* dirp = (fs::user_dirent*)buf;
             dirp->d_ino = ind->ino;
@@ -351,7 +349,7 @@ int fs::regular_file::getdents(char* __user buf, size_t cnt)
 
             buf += reclen;
             cnt -= reclen;
-            return GB_OK;
+            return 0;
         });
 
     if (nread > 0)
@@ -373,7 +371,7 @@ int fs::regular_file::getdents64(char* __user buf, size_t cnt)
 
             size_t reclen = sizeof(fs::user_dirent64) + len;
             if (cnt < reclen)
-                return GB_FAILED;
+                return -EFAULT;
 
             auto* dirp = (fs::user_dirent64*)buf;
             dirp->d_ino = ind->ino;
@@ -386,7 +384,7 @@ int fs::regular_file::getdents64(char* __user buf, size_t cnt)
 
             buf += reclen;
             cnt -= reclen;
-            return GB_OK;
+            return 0;
         });
 
     if (nread > 0)

+ 16 - 14
src/kernel/vfs/tmpfs.cc

@@ -1,10 +1,12 @@
-#include <kernel/vfs.hpp>
-#include <kernel/mm.hpp>
-#include <kernel/log.hpp>
-
 #include <algorithm>
-#include <vector>
 #include <map>
+#include <vector>
+
+#include <stdint.h>
+
+#include <kernel/log.hpp>
+#include <kernel/mm.hpp>
+#include <kernel/vfs.hpp>
 
 using fs::vfs, fs::inode, fs::dentry;
 
@@ -37,9 +39,9 @@ private:
     {
         return static_cast<fdata_t*>(data);
     }
-    static constexpr ptr_t as_val(void* data)
+    static constexpr uintptr_t as_val(void* data)
     {
-        return std::bit_cast<ptr_t>(data);
+        return std::bit_cast<uintptr_t>(data);
     }
     inline void* _getdata(ino_t ino) const
     {
@@ -51,7 +53,7 @@ private:
         inode_data.insert(std::make_pair(ino, data));
         return ino;
     }
-    inline ino_t _savedata(ptr_t data)
+    inline ino_t _savedata(uintptr_t data)
     {
         return _savedata((void*)data);
     }
@@ -93,7 +95,7 @@ protected:
 
             // inode mode filetype is compatible with user dentry filetype
             auto ret = filldir(entry.filename, 0, ind, ind->mode & S_IFMT);
-            if (ret != GB_OK)
+            if (ret != 0)
                 break;
         }
 
@@ -158,7 +160,7 @@ public:
         if (dir->flags.present)
             dir->append(get_inode(file.ino), filename);
 
-        return GB_OK;
+        return 0;
     }
 
     virtual int inode_mknode(dentry* dir, const char* filename, mode_t mode, dev_t dev) override
@@ -175,7 +177,7 @@ public:
         if (dir->flags.present)
             dir->append(get_inode(node.ino), filename);
 
-        return GB_OK;
+        return 0;
     }
 
     virtual int inode_mkdir(dentry* dir, const char* dirname, mode_t mode) override
@@ -192,7 +194,7 @@ public:
         if (dir->flags.present)
             dir->append(new_dir, dirname);
 
-        return GB_OK;
+        return 0;
     }
 
     virtual int symlink(dentry* dir, const char* linkname, const char* target) override
@@ -288,7 +290,7 @@ public:
             st->stx_mask |= STATX_GID;
         }
 
-        return GB_OK;
+        return 0;
     }
 
     virtual int inode_rmfile(dentry* dir, const char* filename) override
@@ -344,7 +346,7 @@ public:
         auto* data = as_fdata(_getdata(file->ino));
         data->resize(size);
         file->size = size;
-        return GB_OK;
+        return 0;
     }
 };
 

+ 188 - 86
src/kinit.cpp

@@ -1,12 +1,10 @@
 #include <asm/port_io.h>
-#include <asm/sys.h>
 
 #include <assert.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <sys/utsname.h>
 
-#include <types/status.h>
 #include <types/types.h>
 
 #include <kernel/hw/keyboard.h>
@@ -15,7 +13,8 @@
 #include <kernel/hw/timer.h>
 #include <kernel/interrupt.h>
 #include <kernel/log.hpp>
-#include <kernel/mem.h>
+#include <kernel/mem/paging.hpp>
+#include <kernel/mem/types.hpp>
 #include <kernel/process.hpp>
 #include <kernel/syscall.hpp>
 #include <kernel/task.h>
@@ -27,42 +26,38 @@ typedef void (*constructor)(void);
 extern constructor const SECTION(".rodata.kinit") start_ctors;
 extern constructor const SECTION(".rodata.kinit") end_ctors;
 
-// TODO: LONG MODE
-// extern struct mem_size_info SECTION(".stage1") asm_mem_size_info;
-// extern uint8_t SECTION(".stage1") asm_e820_mem_map[1024];
-// extern uint32_t SECTION(".stage1") asm_e820_mem_map_count;
-// extern uint32_t SECTION(".stage1") asm_e820_mem_map_entry_size;
+struct PACKED bootloader_data {
+    uint32_t meminfo_entry_count;
+    uint32_t meminfo_entry_length;
 
-SECTION(".text.kinit")
-static inline void save_loader_data(void)
-{
-    // memcpy(e820_mem_map, asm_e820_mem_map, sizeof(e820_mem_map));
-    // e820_mem_map_count = asm_e820_mem_map_count;
-    // e820_mem_map_entry_size = asm_e820_mem_map_entry_size;
-    // memcpy(&mem_size_info, &asm_mem_size_info, sizeof(struct mem_size_info));
-}
+    // don't forget to add the initial 1m to the total
+    uint32_t meminfo_1k_blocks;
+    uint32_t meminfo_64k_blocks;
 
-SECTION(".text.kinit")
-static inline void load_new_gdt(void)
-{
-    create_segment_descriptor(gdt + 0, 0, 0, 0, 0);
-    create_segment_descriptor(gdt + 1, 0, ~0, 0b1100, SD_TYPE_CODE_SYSTEM);
-    create_segment_descriptor(gdt + 2, 0, ~0, 0b1100, SD_TYPE_DATA_SYSTEM);
-    create_segment_descriptor(gdt + 3, 0, ~0, 0b1100, SD_TYPE_CODE_USER);
-    create_segment_descriptor(gdt + 4, 0, ~0, 0b1100, SD_TYPE_DATA_USER);
-    create_segment_descriptor(gdt + 5, (std::size_t)&tss, sizeof(tss), 0b0000, SD_TYPE_TSS);
-    create_segment_descriptor(gdt + 6, 0, 0, 0b1100, SD_TYPE_DATA_USER);
-
-    asm_load_gdt((7 * 8 - 1) << 16, (pptr_t)gdt);
-    asm_load_tr((6 - 1) * 8);
-
-    asm_cli();
-}
+    // meminfo entries
+    kernel::mem::e820_mem_map_entry
+        meminfo_entries[(1024-4*4)/24];
+};
+
+extern void init_vfs();
+
+namespace kernel::kinit {
 
 SECTION(".text.kinit")
-static inline void init_bss_section(void)
+static inline void enable_sse()
 {
-    memset(bss_addr, 0x00, bss_len);
+    asm volatile(
+            "mov %%cr0, %%rax\n\t"
+            "and $(~0xc), %%rax\n\t"
+            "or $0x22, %%rax\n\t"
+            "mov %%rax, %%cr0\n\t"
+            "\n\t"
+            "mov %%cr4, %%rax\n\t"
+            "or $0x600, %%rax\n\t"
+            "mov %%rax, %%cr4\n\t"
+            "fninit\n\t"
+            ::: "rax"
+            );
 }
 
 SECTION(".text.kinit")
@@ -71,26 +66,22 @@ static inline int init_console(const char* name)
     if (name[0] == 't' && name[1] == 't' && name[2] == 'y') {
         if (name[3] == 'S' || name[3] == 's') {
             if (name[4] == '0') {
-                console = types::memory::kinew<serial_tty>(PORT_SERIAL0);
-                return GB_OK;
+                console = new serial_tty(PORT_SERIAL0);
+                return 0;
             }
             if (name[4] == '1') {
-                console = types::memory::kinew<serial_tty>(PORT_SERIAL1);
-                return GB_OK;
+                console = new serial_tty(PORT_SERIAL1);
+                return 0;
             }
         }
         if (name[3] == 'V' && name[3] == 'G' && name[3] == 'A') {
-            console = types::memory::kinew<vga_tty>();
-            return GB_OK;
+            console = new vga_tty{};
+            return 0;
         }
     }
-    return GB_FAILED;
+    return -EINVAL;
 }
 
-extern void init_vfs();
-
-namespace kernel::kinit {
-
 SECTION(".text.kinit")
 static void init_uname()
 {
@@ -103,59 +94,28 @@ static void init_uname()
     strcpy(kernel::sys_utsname->domainname, "(none)");
 }
 
-} // namespace kernel::kinit
-
-struct PACKED bootloader_data {
-    uint32_t meminfo_entry_count;
-    uint32_t meminfo_entry_size;
-    std::byte data[1024 - 2*4];
-};
-
-extern "C" SECTION(".text.kinit")
-void NORETURN kernel_init(bootloader_data* data)
+SECTION(".text.kinit")
+void NORETURN real_kernel_init()
 {
-    // enable SSE
-    asm volatile(
-            "mov %%cr0, %%rax\n\t"
-            "and $(~0xc), %%rax\n\t"
-            "or $0x22, %%rax\n\t"
-            "mov %%rax, %%cr0\n\t"
-            "\n\t"
-            "mov %%cr4, %%rax\n\t"
-            "or $0x600, %%rax\n\t"
-            "mov %%rax, %%cr4\n\t"
-            "fninit\n\t"
-            ::: "rax"
-            );
-
-    int a = data->meminfo_entry_count;
-
-    init_bss_section();
-
-    save_loader_data();
-
-    load_new_gdt();
-
-    // call global ctors
-    // NOTE:
-    // the initializer of global objects MUST NOT contain
+    // call global constructors
+    // NOTE: the initializer of global objects MUST NOT contain
     // all kinds of memory allocations
-    for (const constructor* ctor = &start_ctors; ctor != &end_ctors; ++ctor) {
+    for (auto* ctor = &start_ctors; ctor != &end_ctors; ++ctor)
         (*ctor)();
-    }
 
     init_idt();
-    init_mem();
+    // TODO: LONG MODE
+    // init_mem();
     init_pic();
     init_pit();
 
-    kernel::kinit::init_uname();
+    init_uname();
 
     int ret = init_serial_port(PORT_SERIAL0);
-    assert(ret == GB_OK);
+    assert(ret == 0);
 
     ret = init_console("ttyS0");
-    assert(ret == GB_OK);
+    assert(ret == 0);
 
     kernel::kinit::init_pci();
     init_vfs();
@@ -165,3 +125,145 @@ void NORETURN kernel_init(bootloader_data* data)
     kmsg("switching execution to the scheduler...\n");
     init_scheduler();
 }
+
+extern "C" uint64_t BSS_ADDR;
+extern "C" uint64_t BSS_LENGTH;
+
+SECTION(".text.kinit")
+static inline void setup_early_kernel_page_table()
+{
+    using namespace kernel::mem::paging;
+
+    // remove temporary mapping
+    KERNEL_PAGE_TABLE[0x000].clear();
+
+    constexpr auto idx = idx_all(0xffffffffc0200000ULL);
+
+    auto pdpt = KERNEL_PAGE_TABLE[std::get<1>(idx)].parse();
+    auto pd = pdpt[std::get<2>(idx)].parse();
+
+    // kernel bss, size 2M
+    pd[std::get<3>(idx)].set(PA_P | PA_RW | PA_PS | PA_G | PA_NXE, 0x200000);
+
+    // clear kernel bss
+    memset((void*)BSS_ADDR, 0x00, BSS_LENGTH);
+}
+
+SECTION(".text.kinit")
+static inline void make_early_kernel_stack()
+{
+    using namespace kernel::mem;
+    using namespace kernel::mem::paging;
+
+    auto* kstack_pdpt_page = alloc_page();
+    auto* kstack_page = alloc_pages(9);
+
+    memset(physaddr<char>{page_to_pfn(kstack_pdpt_page)}, 0x00, 0x1000);
+
+    constexpr auto idx = idx_all(0xffffffc040000000ULL);
+
+    // early kernel stack
+    auto pdpte = KERNEL_PAGE_TABLE[std::get<1>(idx)].parse()[std::get<2>(idx)];
+    pdpte.set(PA_P | PA_RW | PA_G | PA_NXE, page_to_pfn(kstack_pdpt_page));
+
+    auto pd = pdpte.parse();
+    pd[std::get<3>(idx)].set(
+            PA_P | PA_RW | PA_PS | PA_G | PA_NXE,
+            page_to_pfn(kstack_page));
+}
+
+SECTION(".text.kinit")
+static inline void setup_buddy(uintptr_t addr_max)
+{
+    using namespace kernel::mem;
+    using namespace kernel::mem::paging;
+    constexpr auto idx = idx_all(0xffffff8040000000ULL);
+
+    addr_max >>= 12;
+    int count = (addr_max * sizeof(page) + 0x200000 - 1) / 0x200000;
+
+    pfn_t start_pfn = 0x400000;
+
+    memset(physaddr<void>{0x105000}, 0x00, 4096);
+
+    auto pdpte = KERNEL_PAGE_TABLE[std::get<1>(idx)].parse()[std::get<2>(idx)];
+    pdpte.set(PA_P | PA_RW | PA_G | PA_NXE, 0x105000);
+
+    auto pd = pdpte.parse();
+    for (int i = 0; i < count; ++i, start_pfn += 0x200000) {
+        pd[std::get<3>(idx)+i].set(
+            PA_P | PA_RW | PA_PS | PA_G | PA_NXE, start_pfn);
+    }
+
+    PAGE_ARRAY = (page*)0xffffff8040000000ULL;
+    memset(PAGE_ARRAY, 0x00, addr_max * sizeof(page));
+
+    for (int i = 0; i < (int)info::e820_entry_count; ++i) {
+        auto& ent = info::e820_entries[i];
+        if (ent.type != 1) // type == 1: free area
+            continue;
+
+        auto start = ent.base;
+        auto end = start + ent.len;
+        if (end <= 0x106000)
+            continue;
+
+        if (start < 0x106000)
+            start = 0x106000;
+
+        if (start < 0x200000 && end >= 0x200000) {
+            mem::paging::create_zone(start, 0x200000);
+            start = start_pfn;
+        }
+
+        if (start > end)
+            continue;
+
+        mem::paging::create_zone(start, end);
+    }
+}
+
+SECTION(".text.kinit")
+static inline void save_memory_info(bootloader_data* data)
+{
+    kernel::mem::info::memory_size = 1ULL * 1024ULL * 1024ULL + // initial 1M
+        1024ULL * data->meminfo_1k_blocks + 64ULL * 1024ULL * data->meminfo_64k_blocks;
+    kernel::mem::info::e820_entry_count = data->meminfo_entry_count;
+    kernel::mem::info::e820_entry_length = data->meminfo_entry_length;
+
+    memcpy(kernel::mem::info::e820_entries, data->meminfo_entries,
+        sizeof(kernel::mem::info::e820_entries));
+}
+
+extern "C" SECTION(".text.kinit")
+void NORETURN kernel_init(bootloader_data* data)
+{
+    enable_sse();
+
+    setup_early_kernel_page_table();
+    save_memory_info(data);
+
+    // create struct pages
+    uintptr_t addr_max = 0;
+    for (int i = 0; i < (int)kernel::mem::info::e820_entry_count; ++i) {
+        auto& ent = kernel::mem::info::e820_entries[i];
+        if (ent.type != 1)
+            continue;
+        addr_max = std::max(addr_max, ent.base + ent.len);
+    }
+
+    setup_buddy(addr_max);
+    make_early_kernel_stack();
+
+    asm volatile(
+            "mov $0xffffffc040200000, %%rsp\n\t"
+            "xor %%rbp, %%rbp\n\t"
+            "call *%0\n\t"
+            :
+            : "r"(real_kernel_init)
+            :
+            );
+    die();
+}
+
+} // namespace kernel::kinit

+ 2 - 2
src/mbr.S

@@ -83,9 +83,9 @@ _loop_read_kernel:
 
 _get_memory_size_use_ax:
     sub $1024, %esp
-    movzwl %ax, %eax
+    movzw %ax, %eax
     mov %eax, 8(%esp)  # 1k blocks
-    movzwl %bx, %ebx
+    movzw %bx, %ebx
     mov %ebx, 12(%esp) # 64k blocks
 
     # save the destination address to es:di

+ 8 - 9
src/types/elf.cpp

@@ -9,7 +9,6 @@
 
 #include <types/elf.hpp>
 
-#include <kernel/mem.h>
 #include <kernel/process.hpp>
 #include <kernel/vfs.hpp>
 
@@ -38,7 +37,7 @@ int types::elf::elf32_load(types::elf::elf32_load_data* d)
     auto* ent_exec = d->exec_dent;
     if (!ent_exec) {
         d->errcode = ENOENT;
-        return GB_FAILED;
+        return -1;
     }
 
     // TODO: detect file format
@@ -51,7 +50,7 @@ int types::elf::elf32_load(types::elf::elf32_load_data* d)
 
     if (n_read != sizeof(types::elf::elf32_header)) {
         d->errcode = EINVAL;
-        return GB_FAILED;
+        return -1;
     }
 
     size_t phents_size = hdr.phentsize * hdr.phnum;
@@ -66,7 +65,7 @@ int types::elf::elf32_load(types::elf::elf32_load_data* d)
     // broken file or I/O error
     if (n_read != phents_size) {
         d->errcode = EINVAL;
-        return GB_FAILED;
+        return -1;
     }
 
     std::vector<types::elf::elf32_section_header_entry> shents(hdr.shnum);
@@ -79,7 +78,7 @@ int types::elf::elf32_load(types::elf::elf32_load_data* d)
     // broken file or I/O error
     if (n_read != shents_size) {
         d->errcode = EINVAL;
-        return GB_FAILED;
+        return -1;
     }
 
     // copy argv and envp
@@ -113,7 +112,7 @@ int types::elf::elf32_load(types::elf::elf32_load_data* d)
                 1,
                 d->system);
 
-            if (ret != GB_OK)
+            if (ret != 0)
                 kill_current(SIGSEGV);
         }
 
@@ -121,7 +120,7 @@ int types::elf::elf32_load(types::elf::elf32_load_data* d)
             auto ret = mmap((char*)vaddr + flen, vlen - flen,
                 nullptr, 0, true, d->system);
 
-            if (ret != GB_OK)
+            if (ret != 0)
                 kill_current(SIGSEGV);
         }
 
@@ -141,7 +140,7 @@ int types::elf::elf32_load(types::elf::elf32_load_data* d)
         types::elf::ELF_STACK_SIZE, nullptr, 0, true, false);
 
     // TODO: destruct local variables before calling kill_current
-    if (ret != GB_OK)
+    if (ret != 0)
         kill_current(SIGSEGV);
 
     d->eip = (void*)hdr.entry;
@@ -184,5 +183,5 @@ int types::elf::elf32_load(types::elf::elf32_load_data* d)
     // rename current thread
     current_thread->name = ent_exec->name;
 
-    return GB_OK;
+    return 0;
 }