Przeglądaj źródła

change(x86): optimize bootstrap code, remove kinit.cpp

some changes are made to the memory layout to make the
initialization more fluent.

fixes the `TaskContext` that `call[0-5]` might cause the function
stack frames to be unaligned.
greatbridf 8 miesięcy temu
rodzic
commit
ebd3d1224c

+ 1 - 5
CMakeLists.txt

@@ -37,8 +37,7 @@ set(BOOTLOADER_SOURCES src/boot.s
                        src/mbr.S
                        )
 
-set(KERNEL_MAIN_SOURCES src/kinit.cpp
-                        src/kernel/async/lock.cc
+set(KERNEL_MAIN_SOURCES src/kernel/async/lock.cc
                         src/kernel/allocator.cc
                         src/kernel/mem/slab.cc
                         src/kernel/hw/acpi.cc
@@ -48,15 +47,12 @@ set(KERNEL_MAIN_SOURCES src/kinit.cpp
                         include/kernel/async/lock.hpp
                         include/kernel/mem/paging.hpp
                         include/kernel/mem/slab.hpp
-                        include/kernel/mem/types.hpp
-                        include/kernel/utsname.hpp
                         include/kernel/hw/acpi.hpp
                         include/kernel/hw/pci.hpp
                         include/kernel/hw/port.hpp
                         include/types/list.hpp
                         include/types/types.h
                         include/types/allocator.hpp
-                        include/kernel/log.hpp
                         )
 
 add_executable(kernel.out ${KERNEL_MAIN_SOURCES} ${BOOTLOADER_SOURCES})

+ 2 - 1
arch/src/x86_64/context.rs

@@ -142,7 +142,8 @@ impl TaskContext {
             "mov %r15, %rcx",
             "mov %rbx, %r8",
             "mov %rbp, %rax",
-            "xor %rbp, %rbp",
+            "xor %rbp, %rbp", // NULL previous stack frame
+            "push %rbp",      // NULL return address.
             "jmp *%rax",
             options(att_syntax),
         );

+ 15 - 22
arch/src/x86_64/init.rs

@@ -104,16 +104,13 @@ impl CPU {
 #[macro_export]
 macro_rules! define_smp_bootstrap {
     ($cpu_count:literal, $ap_entry:ident, $alloc_kstack:tt) => {
-        #[no_mangle]
         static BOOT_SEMAPHORE: core::sync::atomic::AtomicU64 =
             core::sync::atomic::AtomicU64::new(0);
-        #[no_mangle]
         static BOOT_STACK: core::sync::atomic::AtomicU64 =
             core::sync::atomic::AtomicU64::new(0);
 
-        #[no_mangle]
         static CPU_COUNT: core::sync::atomic::AtomicU64 =
-            core::sync::atomic::AtomicU64::new(0);
+            core::sync::atomic::AtomicU64::new(1);
 
         core::arch::global_asm!(
             r#"
@@ -122,11 +119,11 @@ macro_rules! define_smp_bootstrap {
         .globl ap_bootstrap
         .type ap_bootstrap, @function
         ap_bootstrap:
-            ljmp $0x0, $.Lap1
+            ljmp $0x0, $2f
 
-        .Lap1:
+        2:
             # we use the shared gdt for cpu bootstrapping
-            lgdt .Lshared_gdt_desc
+            lgdt EARLY_GDT_DESCRIPTOR
 
             # set msr
             mov $0xc0000080, %ecx
@@ -148,14 +145,10 @@ macro_rules! define_smp_bootstrap {
             or $0x80010001, %eax
             mov %eax, %cr0
 
-            ljmp $0x08, $.Lap_bootstrap_end
-
-        .align 16
-        .Lshared_gdt_desc:
-            .8byte 0x0000000000005f
+            ljmp $0x08, $2f
 
         .code64
-        .Lap_bootstrap_end:
+        2:
             mov $0x10, %ax
             mov %ax, %ds
             mov %ax, %es
@@ -164,21 +157,21 @@ macro_rules! define_smp_bootstrap {
             xor %rsp, %rsp
             xor %rax, %rax
             inc %rax
-        1:
+        2:
             xchg %rax, {BOOT_SEMAPHORE}
             cmp $0, %rax
-            je 1f
+            je 2f
             pause
-            jmp 1b
+            jmp 2b
 
-        1:
+        2:
             mov {BOOT_STACK}, %rsp # Acquire
             cmp $0, %rsp
-            jne 1f
+            jne 2f
             pause
-            jmp 1b
+            jmp 2b
 
-        1:
+        2:
             xor %rax, %rax
             mov %rax, {BOOT_STACK} # Release
             xchg %rax, {BOOT_SEMAPHORE}
@@ -190,7 +183,7 @@ macro_rules! define_smp_bootstrap {
             jmp {AP_ENTRY}
             .popsection
             "#,
-            KERNEL_PML4 = const 0x2000,
+            KERNEL_PML4 = const 0x1000,
             BOOT_SEMAPHORE = sym BOOT_SEMAPHORE,
             BOOT_STACK = sym BOOT_STACK,
             CPU_COUNT = sym CPU_COUNT,
@@ -200,7 +193,7 @@ macro_rules! define_smp_bootstrap {
 
         pub unsafe fn wait_cpus_online() {
             use core::sync::atomic::Ordering;
-            while CPU_COUNT.load(Ordering::Acquire) != $cpu_count - 1 {
+            while CPU_COUNT.load(Ordering::Acquire) != $cpu_count {
                 if BOOT_STACK.load(Ordering::Acquire) == 0 {
                     let stack_bottom = $alloc_kstack as u64;
                     BOOT_STACK.store(stack_bottom, Ordering::Release);

+ 1 - 1
arch/src/x86_64/interrupt.rs

@@ -482,7 +482,7 @@ impl InterruptControl {
             pause();
         }
 
-        icr.write(0xc4601);
+        icr.write(0xc4607);
         while icr.read() & 0x1000 != 0 {
             pause();
         }

+ 26 - 17
arch/src/x86_64/mm.rs

@@ -9,7 +9,7 @@ use eonix_mm::{
 
 pub const PAGE_SIZE: usize = 0x1000;
 
-const KERNEL_PML4_PFN: PFN = PFN::from_val(0x2000 >> 12);
+const KERNEL_PML4_PFN: PFN = PFN::from_val(0x1000 >> 12);
 
 const PA_P: u64 = 0x001;
 const PA_RW: u64 = 0x002;
@@ -156,26 +156,16 @@ impl RawAttribute for PageAttribute64 {
             page_attr |= PageAttribute::ANONYMOUS;
         }
 
-        Some(page_attr)
-    }
-
-    fn from_table_attr(table_attr: TableAttribute) -> Self {
-        let mut raw_attr = PA_RW;
-
-        for attr in table_attr.iter() {
-            match attr {
-                TableAttribute::PRESENT => raw_attr |= PA_P,
-                TableAttribute::GLOBAL => raw_attr |= PA_G,
-                TableAttribute::USER => raw_attr |= PA_US,
-                TableAttribute::ACCESSED => raw_attr |= PA_A,
-                _ => unreachable!("Invalid table attribute"),
-            }
+        if self.0 & PA_PS != 0 {
+            page_attr |= PageAttribute::HUGE;
         }
 
-        Self(raw_attr)
+        Some(page_attr)
     }
+}
 
-    fn from_page_attr(page_attr: PageAttribute) -> Self {
+impl From<PageAttribute> for PageAttribute64 {
+    fn from(page_attr: PageAttribute) -> Self {
         let mut raw_attr = PA_NXE;
 
         for attr in page_attr.iter() {
@@ -191,6 +181,7 @@ impl RawAttribute for PageAttribute64 {
                 PageAttribute::COPY_ON_WRITE => raw_attr |= PA_COW,
                 PageAttribute::MAPPED => raw_attr |= PA_MMAP,
                 PageAttribute::ANONYMOUS => raw_attr |= PA_ANON,
+                PageAttribute::HUGE => raw_attr |= PA_PS,
                 _ => unreachable!("Invalid page attribute"),
             }
         }
@@ -199,4 +190,22 @@ impl RawAttribute for PageAttribute64 {
     }
 }
 
+impl From<TableAttribute> for PageAttribute64 {
+    fn from(table_attr: TableAttribute) -> Self {
+        let mut raw_attr = PA_RW;
+
+        for attr in table_attr.iter() {
+            match attr {
+                TableAttribute::PRESENT => raw_attr |= PA_P,
+                TableAttribute::GLOBAL => raw_attr |= PA_G,
+                TableAttribute::USER => raw_attr |= PA_US,
+                TableAttribute::ACCESSED => raw_attr |= PA_A,
+                _ => unreachable!("Invalid table attribute"),
+            }
+        }
+
+        Self(raw_attr)
+    }
+}
+
 pub type DefaultPagingMode = PagingMode4Levels;

+ 9 - 5
crates/eonix_mm/src/page_table/page_table.rs

@@ -42,6 +42,13 @@ where
     A: PageAlloc,
     X: PageAccess,
 {
+    pub fn with_root_table(root_table_page: Page<A>) -> Self {
+        Self {
+            root_table_page,
+            phantom: PhantomData,
+        }
+    }
+
     pub fn new_in<A1: PageAlloc>(kernel_root_table_page: &Page<A1>, alloc: A) -> Self {
         let new_root_table_page = Page::alloc_in(alloc);
         let new_table_data = X::get_ptr_for_page(&new_root_table_page);
@@ -65,10 +72,7 @@ where
             root_page_table.index_mut(idx).take();
         }
 
-        Self {
-            root_table_page: new_root_table_page,
-            phantom: PhantomData,
-        }
+        Self::with_root_table(new_root_table_page)
     }
 
     pub fn addr(&self) -> PAddr {
@@ -87,7 +91,7 @@ where
     }
 
     pub fn iter_kernel(&self, range: VRange) -> impl Iterator<Item = &mut M::Entry> {
-        Self::iter_kernel_levels(self, range, M::LEVELS)
+        self.iter_kernel_levels(range, M::LEVELS)
     }
 
     /// Iterates over the kernel space entries in the page table for the specified levels.

+ 2 - 13
crates/eonix_mm/src/page_table/pte.rs

@@ -23,10 +23,11 @@ bitflags! {
         const COPY_ON_WRITE = 256;
         const MAPPED = 512;
         const ANONYMOUS = 1024;
+        const HUGE = 2048;
     }
 }
 
-pub trait RawAttribute: Copy {
+pub trait RawAttribute: Copy + From<PageAttribute> + From<TableAttribute> {
     /// Create a new attribute representing a non-present page.
     fn null() -> Self;
 
@@ -43,18 +44,6 @@ pub trait RawAttribute: Copy {
     /// # Panic
     /// The implementor should panic if invalid combinations of flags are present.
     fn as_page_attr(self) -> Option<PageAttribute>;
-
-    /// Convert the attribute to a raw value.
-    ///
-    /// # Panic
-    /// The implementor should panic if invalid combinations of flags are present.
-    fn from_table_attr(table_attr: TableAttribute) -> Self;
-
-    /// Convert the attribute to a raw value.
-    ///
-    /// # Panic
-    /// The implementor should panic if invalid combinations of flags are present.
-    fn from_page_attr(page_attr: PageAttribute) -> Self;
 }
 
 pub trait PTE: Sized {

+ 1 - 1
crates/eonix_mm/src/page_table/pte_iterator.rs

@@ -41,7 +41,7 @@ pub trait IteratorType<M: PagingMode> {
 
             pte.set(
                 page.into_raw(),
-                <M::Entry as PTE>::Attr::from_table_attr(Self::page_table_attributes()),
+                <M::Entry as PTE>::Attr::from(Self::page_table_attributes()),
             );
 
             unsafe {

+ 1 - 1
crates/eonix_mm/src/paging.rs

@@ -4,6 +4,6 @@ mod pfn;
 mod raw_page;
 
 pub use page::{Page, PageAccess, PageBlock, PAGE_SIZE, PAGE_SIZE_BITS};
-pub use page_alloc::{GlobalPageAlloc, PageAlloc};
+pub use page_alloc::{GlobalPageAlloc, NoAlloc, PageAlloc};
 pub use pfn::PFN;
 pub use raw_page::RawPage;

+ 2 - 2
crates/eonix_mm/src/paging/page.rs

@@ -99,7 +99,7 @@ where
     where
         F: FnOnce(&Self) -> O,
     {
-        unsafe { Self::with_raw_in(pfn, func, A::global()) }
+        unsafe { Self::with_raw_in(pfn, A::global(), func) }
     }
 
     /// Do some work with the page without touching the reference count with the same
@@ -187,7 +187,7 @@ where
     ///
     /// # Safety
     /// Check `from_raw_in()` for the safety requirements.
-    pub unsafe fn with_raw_in<F, O>(pfn: PFN, func: F, alloc: A) -> O
+    pub unsafe fn with_raw_in<F, O>(pfn: PFN, alloc: A, func: F) -> O
     where
         F: FnOnce(&Self) -> O,
     {

+ 26 - 1
crates/eonix_mm/src/paging/page_alloc.rs

@@ -1,4 +1,4 @@
-use super::RawPage;
+use super::{raw_page::UnmanagedRawPage, RawPage};
 
 /// A trait for allocating and deallocating pages of memory.
 ///
@@ -42,6 +42,9 @@ pub trait GlobalPageAlloc: PageAlloc + 'static {
     fn global() -> Self;
 }
 
+#[derive(Clone)]
+pub struct NoAlloc;
+
 impl<'a, A> PageAlloc for &'a A
 where
     A: PageAlloc,
@@ -60,3 +63,25 @@ where
         (*self).has_management_over(raw_page)
     }
 }
+
+impl PageAlloc for NoAlloc {
+    type RawPage = UnmanagedRawPage;
+
+    fn alloc_order(&self, _: u32) -> Option<Self::RawPage> {
+        panic!("`NoAlloc` cannot allocate pages");
+    }
+
+    unsafe fn dealloc(&self, _: Self::RawPage) {
+        panic!("`NoAlloc` cannot free pages");
+    }
+
+    fn has_management_over(&self, _: Self::RawPage) -> bool {
+        true
+    }
+}
+
+impl GlobalPageAlloc for NoAlloc {
+    fn global() -> Self {
+        Self
+    }
+}

+ 40 - 0
crates/eonix_mm/src/paging/raw_page.rs

@@ -9,3 +9,43 @@ pub trait RawPage: Clone + Copy + From<PFN> + Into<PFN> {
 
     fn is_present(&self) -> bool;
 }
+
+#[derive(Clone, Copy)]
+pub struct UnmanagedRawPage(PFN);
+
+/// Unmanaged raw pages should always have a non-zero refcount to
+/// avoid `free()` from being called.
+static UNMANAGED_RAW_PAGE_CLONE_COUNT: AtomicUsize = AtomicUsize::new(1);
+
+impl UnmanagedRawPage {
+    pub const fn new(pfn: PFN) -> Self {
+        Self(pfn)
+    }
+}
+
+impl From<PFN> for UnmanagedRawPage {
+    fn from(value: PFN) -> Self {
+        Self::new(value)
+    }
+}
+
+impl Into<PFN> for UnmanagedRawPage {
+    fn into(self) -> PFN {
+        let Self(pfn) = self;
+        pfn
+    }
+}
+
+impl RawPage for UnmanagedRawPage {
+    fn order(&self) -> u32 {
+        0
+    }
+
+    fn refcount(&self) -> &AtomicUsize {
+        &UNMANAGED_RAW_PAGE_CLONE_COUNT
+    }
+
+    fn is_present(&self) -> bool {
+        true
+    }
+}

+ 8 - 8
doc/mem_layout.txt

@@ -1,13 +1,13 @@
 physical memory
 
-0x0000 - 0x1000 : GDT for kernel initialization use and some early kernel data
-0x1000 - 0x2000 : kernel stage1
-0x2000 - 0x3000 : kernel space PML4
-0x3000 - 0x4000 : kernel PDPT for physical memory mappings
-0x4000 - 0x5000 : kernel PDPT for kernel space
-0x5000 - 0x6000 : kernel PD for kernel image
-0x6000 - 0x7000 : kernel PT for kernel image
-0x7000 - 0x8000 : kernel PD for struct page array#1
+0x0000 - 0x1000 : Some early kernel data
+0x1000 - 0x2000 : kernel space PML4
+0x2000 - 0x3000 : kernel PDPT for physical memory mappings
+0x3000 - 0x4000 : kernel PDPT for kernel space
+0x4000 - 0x5000 : kernel PD for kernel image
+0x5000 - 0x6000 : kernel PT for kernel image
+0x6000 - 0x7000 : kernel PD for struct page array#1
+0x7000 - 0x8000 : kernel stage1
 
 0x100000 - 0x200000 : unused
 0x200000 - 0x400000 : first kernel bss page (2MB)

+ 0 - 10
include/kernel/log.hpp

@@ -1,10 +0,0 @@
-#pragma once
-
-#define kmsgf(fmt, ...)
-#define kmsg(msg)
-
-#ifdef NDEBUG
-#define kmsgf_debug(...)
-#else
-#define kmsgf_debug(...) kmsgf(__VA_ARGS__)
-#endif

+ 0 - 11
include/kernel/mem/paging.hpp

@@ -83,17 +83,6 @@ constexpr unsigned long PAGE_PRESENT = 0x00010000;
 constexpr unsigned long PAGE_BUDDY = 0x00020000;
 constexpr unsigned long PAGE_SLAB = 0x00040000;
 
-struct page {
-    // TODO: use atomic
-    unsigned long refcount;
-    unsigned long flags;
-
-    page* next;
-    page* prev;
-};
-
-inline page* PAGE_ARRAY;
-
 constexpr unsigned long PAGE_FAULT_P = 0x00000001;
 constexpr unsigned long PAGE_FAULT_W = 0x00000002;
 constexpr unsigned long PAGE_FAULT_U = 0x00000004;

+ 20 - 21
include/kernel/mem/paging_asm.h

@@ -1,26 +1,25 @@
-#define KERNEL_IMAGE_PADDR         0x400000
-#define KERNEL_STAGE1_PADDR        0x001000
-#define KERNEL_PML4                0x002000
-#define KERNEL_PDPT_PHYS_MAPPING   0x003000
-#define KERNEL_PDPT_KERNEL_SPACE   0x004000
-#define KERNEL_PD_KIMAGE           0x005000
-#define KERNEL_PT_KIMAGE           0x006000
-#define KERNEL_PD_STRUCT_PAGE_ARR  0x007000
+#define KERNEL_IMAGE_PADDR 0x400000
+#define KERNEL_PML4 0x001000
+#define KERNEL_PDPT_PHYS_MAPPING 0x002000
+#define KERNEL_PDPT_KERNEL_SPACE 0x003000
+#define KERNEL_PD_KIMAGE 0x004000
+#define KERNEL_PT_KIMAGE 0x005000
+#define KERNEL_PD_STRUCT_PAGE_ARR 0x006000
+#define KERNEL_STAGE1_PADDR 0x007000
 
-#define KERNEL_BSS_HUGE_PAGE       0x200000
+#define KERNEL_BSS_HUGE_PAGE 0x200000
 
-
-#define PA_P    0x0000000000000001
-#define PA_RW   0x0000000000000002
-#define PA_US   0x0000000000000004
-#define PA_PWT  0x0000000000000008
-#define PA_PCD  0x0000000000000010
-#define PA_A    0x0000000000000020
-#define PA_D    0x0000000000000040
-#define PA_PS   0x0000000000000080
-#define PA_G    0x0000000000000100
-#define PA_COW  0x0000000000000200
+#define PA_P 0x0000000000000001
+#define PA_RW 0x0000000000000002
+#define PA_US 0x0000000000000004
+#define PA_PWT 0x0000000000000008
+#define PA_PCD 0x0000000000000010
+#define PA_A 0x0000000000000020
+#define PA_D 0x0000000000000040
+#define PA_PS 0x0000000000000080
+#define PA_G 0x0000000000000100
+#define PA_COW 0x0000000000000200
 #define PA_MMAP 0x0000000000000400
 #define PA_ANON 0x0000000000000800
-#define PA_NXE  0x8000000000000000
+#define PA_NXE 0x8000000000000000
 #define PA_MASK 0xfff0000000000fff

+ 0 - 2
include/kernel/mem/phys.hpp

@@ -7,8 +7,6 @@
 
 #include <types/types.h>
 
-#include <kernel/mem/types.hpp>
-
 namespace kernel::mem {
 
 template <typename T, bool Cached = true>

+ 0 - 36
include/kernel/mem/types.hpp

@@ -1,36 +0,0 @@
-#pragma once
-
-#include <cstddef>
-
-#include <stdint.h>
-
-namespace kernel::mem {
-
-struct gdt_entry {
-    uint64_t limit_low : 16;
-    uint64_t base_low : 16;
-    uint64_t base_mid : 8;
-    uint64_t access : 8;
-    uint64_t limit_high : 4;
-    uint64_t flags : 4;
-    uint64_t base_high : 8;
-};
-
-struct e820_mem_map_entry {
-    uint64_t base;
-    uint64_t len;
-    uint32_t type;
-
-    // might not be valid
-    uint32_t acpi_extension_attr;
-};
-
-namespace info {
-    inline std::size_t memory_size;
-    inline std::size_t e820_entry_count;
-    inline std::size_t e820_entry_length;
-    inline e820_mem_map_entry e820_entries[(1024 - 16) / 24];
-
-} // namespace info
-
-} // namespace kernel::mem

+ 0 - 9
include/kernel/utsname.hpp

@@ -1,9 +0,0 @@
-#pragma once
-
-#include <sys/utsname.h>
-
-namespace kernel {
-
-inline new_utsname* sys_utsname;
-
-} // namespace kernel

+ 107 - 91
src/boot.s

@@ -4,46 +4,39 @@
 
 .code16
 
-.align 4
-.Lbios_idt_desc:
-    .word 0x03ff     # size
-    .long 0x00000000 # base
-
-.align 4
-.Lnull_idt_desc:
-    .word 0 # size
-    .long 0 # base
-
-.Lhalt16:
+.Lhalt:
     hlt
-    jmp .Lhalt16
+    jmp .
 
 # scratch %eax
 # return address should be of 2 bytes, and will be zero extended to 4 bytes
 go_32bit:
     cli
-    lidt .Lnull_idt_desc
+    # borrow the null entry from the early gdt
+    lidt EARLY_GDT
 
     # set PE bit
     mov %cr0, %eax
     or $1, %eax
     mov %eax, %cr0
 
-    ljmp $0x08, $.Lgo_32bit0
+    ljmp $0x18, $.Lgo_32bit0
 
 .Lgo_16bit0:
-    mov $0x20, %ax
+    mov $0x30, %ax
     mov %ax, %ds
+    mov %ax, %es
     mov %ax, %ss
 
-    lidt .Lbios_idt_desc
+    lidt BIOS_IDT_DESCRIPTOR
 
     mov %cr0, %eax
     and $0xfffffffe, %eax
     mov %eax, %cr0
 
-    ljmp $0x00, $.Lgo_16bit1
-.Lgo_16bit1:
+    ljmp $0x00, $2f
+
+2:
     xor %ax, %ax
     mov %ax, %ds
     mov %ax, %ss
@@ -60,10 +53,10 @@ go_32bit:
 # return address should be of 4 bytes, and extra 2 bytes will be popped from the stack
 go_16bit:
     cli
-    ljmp $0x18, $.Lgo_16bit0
+    ljmp $0x28, $.Lgo_16bit0
 
 .Lgo_32bit0:
-    mov $0x10, %ax
+    mov $0x20, %ax
     mov %ax, %ds
     mov %ax, %es
     mov %ax, %ss
@@ -75,7 +68,7 @@ go_16bit:
 
 # build read disk packet on the stack and perform read operation
 #
-# read 32k to 0x2000 and then copy to destination
+# read 16k to 0x8000 and then copy to destination
 #
 # %edi: lba start
 # %esi: destination
@@ -86,10 +79,10 @@ read_disk:
 
     lea -24(%esp), %esp
 
-    mov $0x00400010, %eax # packet size 0, sector count 64
+    mov $0x00200010, %eax # packet size 0, sector count 64
     mov %eax, (%esp)
 
-    mov $0x02000000, %eax # destination address 0x0200:0x0000
+    mov $0x08000000, %eax # destination address 0x0800:0x0000
     mov %eax, 4(%esp)
 
     mov %edi, 8(%esp)  # lba low 4bytes
@@ -105,19 +98,28 @@ read_disk:
     mov $0x42, %ah
     mov $0x80, %dl
     int $0x13
-    jc .Lhalt16
+    jc .Lhalt
 
     call go_32bit
 .code32
     # move data to destination
-    mov $0x2000, %esi
-    mov $8192, %ecx
+    mov $0x8000, %esi
+    mov $4096, %ecx
     rep movsl
 
     mov %ebp, %esp
     pop %ebp
     ret
 
+.align 8
+.Lgdt_data:
+    .8byte 0x00209a0000000000 # 64bit code selector
+    .8byte 0x0000920000000000 # 64bit data selector
+    .8byte 0x00cf9a000000ffff # 32bit code selector
+    .8byte 0x00cf92000000ffff # 32bit data selector
+    .8byte 0x000f9a000000ffff # 16bit code selector
+    .8byte 0x000f92000000ffff # 16bit data selector
+
 .globl start_32bit
 start_32bit:
     mov $0x10, %ax
@@ -125,13 +127,47 @@ start_32bit:
     mov %ax, %es
     mov %ax, %ss
 
+    mov $EARLY_GDT_DESCRIPTOR, %edi
+    mov $0x37, %ax
+    mov %ax, (%edi)
+
+    mov $EARLY_GDT, %eax
+    mov %eax, 2(%edi)
+
+    # fill in early kernel GDT
+    xchg %eax, %edi
+    xor %eax, %eax
+    mov $2, %ecx
+
+    # null segment
+    rep stosl
+
+    # other data
+    mov $.Lgdt_data, %esi
+    mov $12, %ecx
+
+    rep movsl
+
+    lgdt EARLY_GDT_DESCRIPTOR
+    ljmp $0x18, $2f
+
+2:
+    mov $0x20, %ax
+    mov %ax, %ds
+    mov %ax, %es
+    mov %ax, %ss
+
+    # temporary kernel stack
+    mov $0x1000, %esp
+
     # read kimage into memory
 	lea -16(%esp), %esp
     mov $KIMAGE_32K_COUNT, %ecx
+    shl $1, %ecx
     movl $KERNEL_IMAGE_PADDR, 4(%esp) # destination address
 	movl $9, (%esp) # LBA
 
-.Lread_kimage:
+2:
 	mov (%esp), %edi
 	mov 4(%esp), %esi
 
@@ -139,10 +175,10 @@ start_32bit:
     call read_disk
 	mov %ebx, %ecx
 
-    addl $0x8000, 4(%esp)
-	addl $64, (%esp)
+    addl $0x4000, 4(%esp)
+	addl $32, (%esp)
 
-    loop .Lread_kimage
+    loop 2b
 
 	lea 16(%esp), %esp
 
@@ -150,7 +186,7 @@ start_32bit:
     xor %eax, %eax
 
     # clear paging structures
-    mov $0x2000, %edi
+    mov $0x1000, %edi
     mov $0x6000, %ecx
     shr $2, %ecx # %ecx /= 4
     rep stosl
@@ -180,12 +216,12 @@ start_32bit:
     or $PA_PS, %ebx
     mov $256, %ecx
     xor %esi, %esi
-.Lfill1:
+2:
     call fill_pxe
     lea 8(%edi), %edi
     add $0x40000000, %esi # 1GB
     adc $0, %edx
-    loop .Lfill1
+    loop 2b
 
     mov $(PA_NXE >> 32), %edx
 
@@ -193,12 +229,12 @@ start_32bit:
     or $(PA_PCD | PA_PWT), %ebx
     mov $256, %ecx
     xor %esi, %esi
-.Lfill2:
+2:
     call fill_pxe
     lea 8(%edi), %edi
     add $0x40000000, %esi # 1GB
     adc $0, %edx
-    loop .Lfill2
+    loop 2b
 
     xor %edx, %edx
 
@@ -210,9 +246,14 @@ start_32bit:
     and $(~(PA_PCD | PA_PWT | PA_PS)), %ebx
     call fill_pxe
 
-    # PDPTE 0xff8
+    # PDPTE 0x008
     mov $KERNEL_PDPT_KERNEL_SPACE, %edi
-    lea 0xff8(%edi), %edi
+    lea 0x8(%edi), %edi
+    mov $KERNEL_PD_STRUCT_PAGE_ARR, %esi
+    call fill_pxe
+
+    # PDPTE 0xff8
+    lea 0xff0(%edi), %edi
     mov $KERNEL_PD_KIMAGE, %esi
     call fill_pxe
 
@@ -228,11 +269,11 @@ start_32bit:
 
     mov $KIMAGE_PAGES, %ecx
 
-.Lfill3:
+2:
     call fill_pxe
     lea 8(%edi), %edi
     lea 0x1000(%esi), %esi
-    loop .Lfill3
+    loop 2b
 
     # set msr
     mov $0xc0000080, %ecx
@@ -254,29 +295,7 @@ start_32bit:
     or $0x80010001, %eax
     mov %eax, %cr0
 
-    # create gdt
-    xor %eax, %eax # at 0x0000
-    mov %eax, 0x00(%eax)
-    mov %eax, 0x04(%eax) # null descriptor
-    mov %eax, 0x08(%eax) # code segment lower
-    mov %eax, 0x10(%eax) # data segment lower
-    mov $0x00209a00, %ecx
-    mov %ecx, 0x0c(%eax) # code segment higher
-    mov $0x00009200, %ecx
-    mov %ecx, 0x14(%eax) # data segment higher
-
-    # gdt descriptor
-    push %eax
-    push %eax
-
-    # pad with a word
-    mov $0x00170000, %eax
-    push %eax
-
-    lgdt 2(%esp)
-    add $12, %esp
-
-    ljmp $0x08, $.L64bit_entry
+    ljmp $0x08, $2f
 
 # %ebx: attribute low
 # %edx: attribute high
@@ -290,34 +309,31 @@ fill_pxe:
     ret
 
 .code64
-.L64bit_entry:
-    jmp start_64bit
+2:
+    jmp 2f
 
 .section .text
-start_64bit:
-    # We map the first 1GB identically to the first 1GB of physical memory,
-    # move sp to the correct position in identically mapped area of kernel space.
-    mov %rsp, %rdi
-    xor %rsp, %rsp
-    inc %rsp
-    neg %rsp
-    shr $40, %rsp
-    shl $40, %rsp
-
-    add %rdi, %rsp
-    mov %rsp, %rdi
-
-    # make stack frame
-    lea -16(%rsp), %rsp
-    mov %rsp, %rbp
-
-    xor %rax, %rax
-    mov %rax, (%rsp)
-    mov %rax, 8(%rsp)
-
-    call kernel_init
-
-.L64bit_hlt:
-    cli
-    hlt
-    jmp .L64bit_hlt
+2:
+    mov $0x10, %ax
+    mov %ax, %ds
+    mov %ax, %es
+    mov %ax, %ss
+
+    # load kernel identically mapped base address
+    mov $0xffffff, %rax
+    shl $40, %rax
+
+    # place the stack at physical address 0x80000
+    mov $0x80000, %rsp
+    add %rax, %rsp
+
+    # clear the previous stack frame base, setting the return address to 0
+    xor %rbp, %rbp
+    push %rbp
+
+    # argument 1: the pointer to the bootloader data (paddr 0x0 + kernel space offset)
+    mov $E820_MEM_MAP_DATA, %rdi
+    add %rax, %rdi
+
+    # we use jmp instead of call since we've set the return address above
+    jmp _kernel_init

+ 35 - 3
src/kernel.ld

@@ -2,8 +2,10 @@ OUTPUT_FORMAT(elf64-x86-64)
 
 MEMORY
 {
+    LOWDATA       (wx) : org = 0x0500, l = 9 * 256
     MBR           (wx) : org = 0x0e00, l = 512
-    STAGE1        (wx) : org = 0x1000, l = 4K
+    PAGETABLES    (wx) : org = 0x1000, l = 6 * 4K
+    STAGE1        (wx) : org = 0x7000, l = 4K
     PHYMEM        (w)  : org = 0xffffff0000000000, len = 512 * 1024M
     PARRAY        (w)  : org = 0xffffff8000000000, len = 128 * 1024M
     KBSS          (w)  : org = 0xffffffffc0200000, len = 2M
@@ -13,12 +15,42 @@ MEMORY
 
 SECTIONS
 {
+    .low (NOLOAD) :
+    {
+        . += 8; /* skip 0x0 address */
+
+        EARLY_GDT = .;
+        . += 8; /* null descriptor */
+        . += 8; /* 64bit code descriptor */
+        . += 8; /* 64bit data descriptor */
+        . += 8; /* 32bit code descriptor */
+        . += 8; /* 32bit data descriptor */
+        . += 8; /* 16bit code descriptor */
+        . += 8; /* 16bit data descriptor */
+
+        . = ALIGN(16);
+        . += 2;
+        EARLY_GDT_DESCRIPTOR = .;
+
+        . += 6; /* size and base */
+
+        . = ALIGN(16);
+        . += 2;
+        BIOS_IDT_DESCRIPTOR = .;
+
+        . += 6; /* size and base */
+
+        . = ALIGN(16);
+        E820_MEM_MAP_DATA = .;
+
+        . += 1024;
+    } > LOWDATA
+
     .mbr : AT(0)
     {
         KEEP(*(.mbr));
 
-        . = 446;
-        BYTE(0x00);
+        . = 446; /* avoid the MBR being overwritten */
 
         . = 510;
         BYTE(0x55);

+ 1 - 1
src/kernel/cpu.rs

@@ -17,7 +17,7 @@ pub unsafe fn local_cpu() -> Pin<&'static mut CPU> {
 pub fn percpu_allocate(layout: Layout) -> NonNull<u8> {
     // TODO: Use page size defined in `arch`.
     let page_count = layout.size().div_ceil(arch::PAGE_SIZE);
-    let page = Page::alloc_at_least_in(page_count, GlobalPageAlloc::buddy_alloc());
+    let page = Page::alloc_at_least_in(page_count, GlobalPageAlloc::early_alloc());
     let page_data = page.as_memblk().as_byte_ptr();
     core::mem::forget(page);
 

+ 2 - 2
src/kernel/mem.rs

@@ -6,8 +6,8 @@ mod mm_area;
 mod mm_list;
 mod page_alloc;
 
-pub use access::{AsMemoryBlock, MemoryBlock, PhysAccess};
+pub use access::{AsMemoryBlock, KernelPageAccess, MemoryBlock, PhysAccess};
 pub(self) use mm_area::MMArea;
 pub use mm_list::{handle_page_fault, FileMapping, MMList, Mapping, Permission};
-pub use page_alloc::GlobalPageAlloc;
+pub use page_alloc::{GlobalPageAlloc, RawPage};
 pub use paging::{Page, PageBuffer};

+ 5 - 11
src/kernel/mem/mm_area.rs

@@ -81,10 +81,7 @@ impl MMArea {
 
     /// # Return
     /// Whether the whole handling process is done.
-    pub fn handle_cow<E>(&self, pte: &mut E) -> bool
-    where
-        E: PTE,
-    {
+    pub fn handle_cow(&self, pte: &mut impl PTE) -> bool {
         let mut page_attr = pte.get_attr().as_page_attr().expect("Not a page attribute");
         let pfn = pte.get_pfn();
 
@@ -96,7 +93,7 @@ impl MMArea {
             // SAFETY: This is actually safe. If we read `1` here and we have `MMList` lock
             // held, there couldn't be neither other processes sharing the page, nor other
             // threads making the page COW at the same time.
-            pte.set_attr(E::Attr::from_page_attr(page_attr));
+            pte.set_attr(page_attr.into());
             core::mem::forget(page);
             return true;
         }
@@ -120,17 +117,14 @@ impl MMArea {
 
         page_attr.remove(PageAttribute::ACCESSED);
 
-        pte.set(new_page.into_raw(), E::Attr::from_page_attr(page_attr));
+        pte.set(new_page.into_raw(), page_attr.into());
 
         false
     }
 
     /// # Arguments
     /// * `offset`: The offset from the start of the mapping, aligned to 4KB boundary.
-    pub fn handle_mmap<E>(&self, pte: &mut E, offset: usize) -> KResult<()>
-    where
-        E: PTE,
-    {
+    pub fn handle_mmap(&self, pte: &mut impl PTE, offset: usize) -> KResult<()> {
         // TODO: Implement shared mapping
         let mut page_attr = pte.get_attr().as_page_attr().expect("Not a page attribute");
         let pfn = pte.get_pfn();
@@ -164,7 +158,7 @@ impl MMArea {
         page_attr.insert(PageAttribute::PRESENT);
         page_attr.remove(PageAttribute::MAPPED);
 
-        pte.set_attr(E::Attr::from_page_attr(page_attr));
+        pte.set_attr(page_attr.into());
         Ok(())
     }
 

+ 4 - 7
src/kernel/mem/mm_list.rs

@@ -590,7 +590,7 @@ where
         let mut attr = PageAttribute::PRESENT | PageAttribute::USER | PageAttribute::COPY_ON_WRITE;
         attr.set(PageAttribute::EXECUTE, execute);
 
-        self.set(EMPTY_PAGE.clone().into_raw(), T::Attr::from_page_attr(attr));
+        self.set(EMPTY_PAGE.clone().into_raw(), T::Attr::from(attr));
     }
 
     fn set_mapped(&mut self, execute: bool) {
@@ -599,7 +599,7 @@ where
         let mut attr = PageAttribute::MAPPED | PageAttribute::USER | PageAttribute::COPY_ON_WRITE;
         attr.set(PageAttribute::EXECUTE, execute);
 
-        self.set(EMPTY_PAGE.clone().into_raw(), T::Attr::from_page_attr(attr));
+        self.set(EMPTY_PAGE.clone().into_raw(), T::Attr::from(attr));
     }
 
     fn set_copy_on_write(&mut self, from: &mut Self) {
@@ -620,11 +620,8 @@ where
             Page::with_raw(from.get_pfn(), |page| page.clone().into_raw())
         };
 
-        self.set(
-            pfn,
-            T::Attr::from_page_attr(from_attr & !PageAttribute::ACCESSED),
-        );
+        self.set(pfn, T::Attr::from(from_attr & !PageAttribute::ACCESSED));
 
-        from.set_attr(T::Attr::from_page_attr(from_attr));
+        from.set_attr(T::Attr::from(from_attr));
     }
 }

+ 51 - 40
src/kernel/mem/page_alloc.rs

@@ -4,12 +4,14 @@ use super::{paging::AllocZeroed as _, Page};
 use buddy_allocator::{BuddyAllocator, BuddyRawPage as _};
 use core::{ptr::NonNull, sync::atomic::Ordering};
 use eonix_mm::{
-    address::{AddrOps as _, PAddr},
+    address::{AddrOps as _, PAddr, PRange},
     paging::{GlobalPageAlloc as GlobalPageAllocTrait, PageAlloc, PFN},
 };
-use eonix_sync::Spin;
+use eonix_sync::{NoContext, Spin};
 use intrusive_list::List;
-use raw_page::{PageFlags, RawPage, RawPagePtr};
+use raw_page::{PageFlags, RawPagePtr};
+
+pub use raw_page::RawPage;
 
 const COSTLY_ORDER: u32 = 3;
 const BATCH_SIZE: u32 = 64;
@@ -20,13 +22,15 @@ static BUDDY_ALLOC: Spin<BuddyAllocator<RawPagePtr>> = Spin::new(BuddyAllocator:
 static PERCPU_PAGE_ALLOC: PerCpuPageAlloc = PerCpuPageAlloc::new();
 
 #[derive(Clone)]
-pub struct NoAlloc;
+pub struct GlobalPageAlloc;
 
 #[derive(Clone)]
-pub struct GlobalPageAlloc;
+pub struct BuddyPageAlloc();
 
+/// Allocator that allocates pages from the buddy allocator while we are still in
+/// the early stage of the kernel when the preemption is both disabled and not functioning.
 #[derive(Clone)]
-pub struct BuddyPageAlloc;
+pub struct EarlyPageAlloc();
 
 struct PerCpuPageAlloc {
     batch: u32,
@@ -86,8 +90,37 @@ impl PerCpuPageAlloc {
 }
 
 impl GlobalPageAlloc {
+    #[allow(dead_code)]
     pub const fn buddy_alloc() -> BuddyPageAlloc {
-        BuddyPageAlloc
+        BuddyPageAlloc()
+    }
+
+    pub const fn early_alloc() -> EarlyPageAlloc {
+        EarlyPageAlloc()
+    }
+
+    pub fn mark_present(range: PRange) {
+        let mut pfn = PFN::from(range.start().ceil());
+        let end_pfn = PFN::from(range.end().floor());
+
+        while pfn < end_pfn {
+            RawPagePtr::from(pfn).flags().set(PageFlags::PRESENT);
+            pfn = pfn + 1;
+        }
+    }
+
+    /// Add the pages in the PAddr range `range` to the global allocator.
+    ///
+    /// This function is only to be called on system initialization when `eonix_preempt`
+    /// is not functioning due to the absence of percpu area.
+    ///
+    /// # Safety
+    /// This function is unsafe because calling this function in preemptible context
+    /// might involve dead locks.
+    pub unsafe fn add_pages(range: PRange) {
+        BUDDY_ALLOC
+            .lock_with_context(NoContext)
+            .create_pages(range.start(), range.end())
     }
 }
 
@@ -126,34 +159,12 @@ impl PageAlloc for GlobalPageAlloc {
     }
 }
 
-impl PageAlloc for NoAlloc {
-    type RawPage = RawPagePtr;
-
-    fn alloc_order(&self, _order: u32) -> Option<RawPagePtr> {
-        panic!("NoAlloc cannot allocate pages");
-    }
-
-    unsafe fn dealloc(&self, _: RawPagePtr) {
-        panic!("NoAlloc cannot deallocate pages");
-    }
-
-    fn has_management_over(&self, _: RawPagePtr) -> bool {
-        true
-    }
-}
-
 impl GlobalPageAllocTrait for GlobalPageAlloc {
     fn global() -> Self {
         GlobalPageAlloc
     }
 }
 
-impl GlobalPageAllocTrait for NoAlloc {
-    fn global() -> Self {
-        NoAlloc
-    }
-}
-
 impl PageAlloc for BuddyPageAlloc {
     type RawPage = RawPagePtr;
 
@@ -170,20 +181,20 @@ impl PageAlloc for BuddyPageAlloc {
     }
 }
 
-#[no_mangle]
-pub extern "C" fn mark_present(start: usize, end: usize) {
-    let mut start_pfn = PFN::from(PAddr::from(start).ceil());
-    let end_pfn = PFN::from(PAddr::from(end).floor());
+impl PageAlloc for EarlyPageAlloc {
+    type RawPage = RawPagePtr;
 
-    while start_pfn < end_pfn {
-        RawPagePtr::from(start_pfn).flags().set(PageFlags::PRESENT);
-        start_pfn = start_pfn + 1;
+    fn alloc_order(&self, order: u32) -> Option<Self::RawPage> {
+        BUDDY_ALLOC.lock_with_context(NoContext).alloc_order(order)
     }
-}
 
-#[no_mangle]
-pub extern "C" fn create_pages(start: PAddr, end: PAddr) {
-    BUDDY_ALLOC.lock().create_pages(start, end);
+    unsafe fn dealloc(&self, raw_page: Self::RawPage) {
+        BUDDY_ALLOC.lock_with_context(NoContext).dealloc(raw_page);
+    }
+
+    fn has_management_over(&self, page_ptr: Self::RawPage) -> bool {
+        BuddyAllocator::has_management_over(page_ptr)
+    }
 }
 
 #[no_mangle]

+ 2 - 6
src/kernel/mem/paging.rs

@@ -1,10 +1,6 @@
-use super::{
-    access::AsMemoryBlock,
-    page_alloc::{GlobalPageAlloc, NoAlloc},
-    MemoryBlock, PhysAccess,
-};
+use super::{access::AsMemoryBlock, page_alloc::GlobalPageAlloc, MemoryBlock, PhysAccess};
 use crate::io::{Buffer, FillResult};
-use eonix_mm::paging::{Page as GenericPage, PageAlloc};
+use eonix_mm::paging::{NoAlloc, Page as GenericPage, PageAlloc};
 
 pub type PageUnmanaged = GenericPage<NoAlloc>;
 pub type Page = GenericPage<GlobalPageAlloc>;

+ 226 - 0
src/kernel_init.rs

@@ -0,0 +1,226 @@
+use crate::{
+    kernel::{
+        self,
+        cpu::init_localcpu,
+        mem::{AsMemoryBlock, GlobalPageAlloc, KernelPageAccess, RawPage},
+    },
+    kernel_init,
+};
+use arch::DefaultPagingMode;
+use eonix_mm::{
+    address::{Addr as _, AddrOps as _, PAddr, PRange, VAddr, VRange},
+    page_table::{PageAttribute, PagingMode as _, PTE},
+    paging::{NoAlloc, Page as GenericPage, PAGE_SIZE, PFN},
+};
+use eonix_runtime::context::ExecutionContext;
+use eonix_sync::LazyLock;
+
+static GLOBAL_PAGE_TABLE: LazyLock<
+    eonix_mm::page_table::PageTable<DefaultPagingMode, NoAlloc, KernelPageAccess>,
+> = LazyLock::new(|| unsafe {
+    GenericPage::with_raw(
+        DefaultPagingMode::KERNEL_ROOT_TABLE_PFN,
+        |root_table_page| eonix_mm::page_table::PageTable::with_root_table(root_table_page.clone()),
+    )
+});
+
+const HUGE_PAGE_LEN: usize = 1 << 21;
+
+const P_KERNEL_BSS_START: PAddr = PAddr::from_val(0x200000);
+const P_KIMAGE_START: PAddr = PAddr::from_val(0x400000);
+
+const V_KERNEL_PAGE_ARRAY_START: VAddr = VAddr::from(0xffffff8040000000);
+const V_KERNEL_BSS_START: VAddr = VAddr::from(0xffffffffc0200000);
+const KERNEL_BSS_LEN: usize = HUGE_PAGE_LEN;
+
+#[repr(C)]
+#[derive(Copy, Clone)]
+struct E820MemMapEntry {
+    base: u64,
+    len: u64,
+    entry_type: u32,
+    acpi_attrs: u32,
+}
+
+#[repr(C)]
+#[derive(Copy, Clone)]
+struct BootLoaderData {
+    entry_count: u32,
+    entry_length: u32,
+
+    block_count_1k: u32,
+    block_count_64k: u32,
+
+    all_entries: [E820MemMapEntry; 42],
+}
+
+impl E820MemMapEntry {
+    const ENTRY_FREE: u32 = 1;
+    // const ENTRY_USED: u32 = 2;
+
+    fn is_free(&self) -> bool {
+        self.entry_type == Self::ENTRY_FREE
+    }
+
+    // fn is_used(&self) -> bool {
+    //     self.entry_type == Self::ENTRY_USED
+    // }
+
+    fn range(&self) -> PRange {
+        PRange::from(PAddr::from(self.base as usize)).grow(self.len as usize)
+    }
+}
+
+impl BootLoaderData {
+    // fn memory_size(&self) -> usize {
+    //     // The initial 1M is not counted in the E820 map. We add them to the total as well.
+    //     ((self.block_count_1k + 64 * self.block_count_64k) * 1024 + 1 * 1024 * 1024) as usize
+    // }
+
+    fn entries(&self) -> &[E820MemMapEntry] {
+        &self.all_entries[..self.entry_count as usize]
+    }
+
+    fn free_entries(&self) -> impl Iterator<Item = &E820MemMapEntry> {
+        self.entries().iter().filter(|entry| entry.is_free())
+    }
+}
+
+#[no_mangle]
+pub(self) extern "C" fn _kernel_init(bootloader_data: &mut BootLoaderData) -> ! {
+    // Map kernel BSS
+    for pte in GLOBAL_PAGE_TABLE.iter_kernel_levels(
+        VRange::from(V_KERNEL_BSS_START).grow(KERNEL_BSS_LEN),
+        &DefaultPagingMode::LEVELS[..3],
+    ) {
+        let attr = PageAttribute::PRESENT
+            | PageAttribute::WRITE
+            | PageAttribute::READ
+            | PageAttribute::HUGE
+            | PageAttribute::GLOBAL;
+
+        pte.set(PFN::from(P_KERNEL_BSS_START), attr.into());
+    }
+
+    unsafe {
+        // SAFETY: We've just mapped the area with sufficient length.
+        core::ptr::write_bytes(V_KERNEL_BSS_START.addr() as *mut (), 0, KERNEL_BSS_LEN);
+    }
+
+    let addr_max = bootloader_data
+        .free_entries()
+        .map(|entry| entry.range().end())
+        .max()
+        .expect("No free memory");
+
+    let pfn_max = PFN::from(addr_max.ceil());
+    let len_bytes_page_array = usize::from(pfn_max) * size_of::<RawPage>();
+
+    let count_huge_pages = len_bytes_page_array.div_ceil(HUGE_PAGE_LEN);
+
+    extern "C" {
+        // Definition inside linker script.
+        fn KIMAGE_PAGES();
+    }
+
+    let kimage_pages = unsafe { core::mem::transmute::<_, usize>(KIMAGE_PAGES as *const ()) };
+
+    let paddr_after_kimage = P_KIMAGE_START + kimage_pages * PAGE_SIZE;
+    let paddr_after_kimage_aligned = paddr_after_kimage.ceil_to(HUGE_PAGE_LEN);
+
+    let mut paddr_free = paddr_after_kimage_aligned;
+
+    // Map kernel page array.
+    for pte in GLOBAL_PAGE_TABLE.iter_kernel_levels(
+        VRange::from(V_KERNEL_PAGE_ARRAY_START).grow(HUGE_PAGE_LEN * count_huge_pages),
+        &DefaultPagingMode::LEVELS[..3],
+    ) {
+        let attr = PageAttribute::PRESENT
+            | PageAttribute::WRITE
+            | PageAttribute::READ
+            | PageAttribute::HUGE
+            | PageAttribute::GLOBAL;
+
+        pte.set(PFN::from(paddr_free), attr.into());
+
+        paddr_free = paddr_free + HUGE_PAGE_LEN;
+    }
+
+    unsafe {
+        // SAFETY: We've just mapped the area with sufficient length.
+        core::ptr::write_bytes(
+            V_KERNEL_PAGE_ARRAY_START.addr() as *mut (),
+            0,
+            count_huge_pages * HUGE_PAGE_LEN,
+        );
+    }
+
+    let paddr_unused_start = paddr_free;
+
+    for entry in bootloader_data.free_entries() {
+        let mut range = entry.range();
+
+        GlobalPageAlloc::mark_present(range);
+
+        if range.end() <= paddr_unused_start {
+            continue;
+        }
+
+        if range.start() < paddr_unused_start {
+            let (_, right) = range.split_at(paddr_unused_start);
+            range = right;
+        }
+
+        unsafe {
+            // SAFETY: We are in system initialization procedure where preemption is disabled.
+            GlobalPageAlloc::add_pages(range);
+        }
+    }
+
+    unsafe {
+        // SAFETY: We are in system initialization procedure where preemption is disabled.
+        GlobalPageAlloc::add_pages(PRange::new(PAddr::from(0x100000), PAddr::from(0x200000)));
+        GlobalPageAlloc::add_pages(PRange::new(paddr_after_kimage, paddr_after_kimage_aligned));
+    }
+
+    let (stack_bottom_addr, stack_pfn) = {
+        let kernel_stack_page = GenericPage::alloc_order_in(9, GlobalPageAlloc::early_alloc());
+        let stack_area = kernel_stack_page.as_memblk();
+
+        let stack_bottom_addr = stack_area
+            .addr()
+            .checked_add(stack_area.len())
+            .expect("The stack bottom should not be null");
+
+        let stack_pfn = kernel_stack_page.into_raw();
+
+        (stack_bottom_addr, stack_pfn)
+    };
+
+    let mut to_ctx = ExecutionContext::new();
+    to_ctx.set_interrupt(false);
+    to_ctx.set_sp(stack_bottom_addr.get());
+    to_ctx.call1(_init_on_new_stack, usize::from(stack_pfn));
+
+    to_ctx.switch_noreturn();
+}
+
+extern "C" fn _init_on_new_stack(early_kernel_stack_pfn: PFN) -> ! {
+    // Add the pages previously used by `_kernel_init` as a stack.
+    unsafe {
+        // SAFETY: We are in system initialization procedure where preemption is disabled.
+        GlobalPageAlloc::add_pages(PRange::new(PAddr::from(0x8000), PAddr::from(0x80000)));
+    }
+
+    init_localcpu();
+
+    extern "C" {
+        fn init_allocator();
+    }
+
+    unsafe { init_allocator() };
+
+    kernel::interrupt::init().unwrap();
+
+    kernel_init(early_kernel_stack_pfn)
+}

+ 0 - 154
src/kinit.cpp

@@ -1,154 +0,0 @@
-#include <stdint.h>
-#include <sys/utsname.h>
-
-#include <types/allocator.hpp>
-#include <types/types.h>
-
-#include <kernel/hw/acpi.hpp>
-#include <kernel/hw/pci.hpp>
-#include <kernel/log.hpp>
-#include <kernel/mem/paging.hpp>
-#include <kernel/mem/phys.hpp>
-#include <kernel/mem/types.hpp>
-#include <kernel/utsname.hpp>
-
-using constructor = void (*)();
-extern "C" constructor const start_ctors, end_ctors;
-extern "C" uint64_t BSS_ADDR, BSS_LENGTH;
-
-struct PACKED bootloader_data {
-    uint32_t meminfo_entry_count;
-    uint32_t meminfo_entry_length;
-
-    // don't forget to add the initial 1m to the total
-    uint32_t meminfo_1k_blocks;
-    uint32_t meminfo_64k_blocks;
-
-    // meminfo entries
-    kernel::mem::e820_mem_map_entry meminfo_entries[(1024 - 4 * 4) / 24];
-};
-
-namespace kernel::kinit {
-
-static inline void setup_early_kernel_page_table() {
-    using namespace kernel::mem::paging;
-
-    constexpr auto idx = idx_all(0xffffffffc0200000ULL);
-
-    auto pdpt = KERNEL_PAGE_TABLE[std::get<1>(idx)].parse();
-    auto pd = pdpt[std::get<2>(idx)].parse();
-
-    // kernel bss, size 2M
-    pd[std::get<3>(idx)].set(PA_KERNEL_DATA_HUGE, KERNEL_BSS_HUGE_PAGE);
-
-    // clear kernel bss
-    memset((void*)BSS_ADDR, 0x00, BSS_LENGTH);
-}
-
-extern "C" char KIMAGE_PAGES[];
-extern "C" void create_pages(uintptr_t start, uintptr_t end);
-extern "C" void mark_present(uintptr_t start, uintptr_t end);
-
-static inline void setup_buddy(uintptr_t addr_max) {
-    using namespace kernel::mem;
-    using namespace kernel::mem::paging;
-    constexpr auto idx = idx_all(0xffffff8040000000ULL);
-
-    addr_max += 0xfff;
-    addr_max >>= 12;
-    int count = (addr_max * sizeof(page) + 0x200000 - 1) / 0x200000;
-
-    auto KIMAGE_PAGES_VALUE = (size_t)KIMAGE_PAGES;
-    pfn_t real_start_pfn = KERNEL_IMAGE_PADDR + KIMAGE_PAGES_VALUE * 0x1000;
-    pfn_t aligned_start_pfn = real_start_pfn + 0x200000 - 1;
-    aligned_start_pfn &= ~0x1fffff;
-
-    pfn_t saved_start_pfn = aligned_start_pfn;
-
-    memset(physaddr<void>{KERNEL_PD_STRUCT_PAGE_ARR}, 0x00, 4096);
-
-    auto pdpte = KERNEL_PAGE_TABLE[std::get<1>(idx)].parse()[std::get<2>(idx)];
-    pdpte.set(PA_KERNEL_PAGE_TABLE, KERNEL_PD_STRUCT_PAGE_ARR);
-
-    auto pd = pdpte.parse();
-    for (int i = 0; i < count; ++i, aligned_start_pfn += 0x200000)
-        pd[std::get<3>(idx) + i].set(PA_KERNEL_DATA_HUGE, aligned_start_pfn);
-
-    PAGE_ARRAY = (page*)0xffffff8040000000ULL;
-    memset(PAGE_ARRAY, 0x00, addr_max * sizeof(page));
-
-    for (int i = 0; i < (int)info::e820_entry_count; ++i) {
-        auto& ent = info::e820_entries[i];
-
-        if (ent.type != 1) // type == 1: free area
-            continue;
-        mark_present(ent.base, ent.base + ent.len);
-
-        auto start = ent.base;
-        auto end = start + ent.len;
-        if (end <= aligned_start_pfn)
-            continue;
-
-        if (start < aligned_start_pfn)
-            start = aligned_start_pfn;
-
-        if (start > end)
-            continue;
-
-        create_pages(start, end);
-    }
-
-    // unused space
-    create_pages(0x9000, 0x80000);
-    create_pages(0x100000, 0x200000);
-    create_pages(real_start_pfn, saved_start_pfn);
-}
-
-static inline void save_memory_info(bootloader_data* data) {
-    kernel::mem::info::memory_size = 1ULL * 1024ULL * 1024ULL + // initial 1M
-                                     1024ULL * data->meminfo_1k_blocks +
-                                     64ULL * 1024ULL * data->meminfo_64k_blocks;
-    kernel::mem::info::e820_entry_count = data->meminfo_entry_count;
-    kernel::mem::info::e820_entry_length = data->meminfo_entry_length;
-
-    memcpy(kernel::mem::info::e820_entries, data->meminfo_entries,
-           sizeof(kernel::mem::info::e820_entries));
-}
-
-extern "C" void rust_kinit(uintptr_t early_kstack_vaddr);
-
-extern "C" void NORETURN kernel_init(bootloader_data* data) {
-    setup_early_kernel_page_table();
-    save_memory_info(data);
-
-    uintptr_t addr_max = 0;
-    for (int i = 0; i < (int)kernel::mem::info::e820_entry_count; ++i) {
-        auto& ent = kernel::mem::info::e820_entries[i];
-        if (ent.type != 1)
-            continue;
-        addr_max = std::max(addr_max, ent.base + ent.len);
-    }
-
-    setup_buddy(addr_max);
-
-    using namespace mem::paging;
-    auto kernel_stack_pfn = page_to_pfn(c_alloc_pages(9)) << 12;
-    auto kernel_stack_ptr = mem::physaddr<std::byte>{kernel_stack_pfn} + (1 << 9) * 0x1000;
-
-    asm volatile(
-        "mov %1, %%rdi\n\t"
-        "lea -8(%2), %%rsp\n\t"
-        "xor %%rbp, %%rbp\n\t"
-        "mov %%rbp, (%%rsp)\n\t" // Clear previous frame pointer
-        "jmp *%0\n\t"
-        :
-        : "r"(rust_kinit), "g"(kernel_stack_pfn), "r"(kernel_stack_ptr)
-        : "memory");
-
-    for (;;)
-        asm volatile(
-            "cli\n\t"
-            "hlt\n\t");
-}
-
-} // namespace kernel::kinit

+ 7 - 17
src/lib.rs

@@ -18,6 +18,7 @@ mod fs;
 mod hash;
 mod io;
 mod kernel;
+mod kernel_init;
 mod net;
 mod path;
 mod prelude;
@@ -27,10 +28,9 @@ mod sync;
 use alloc::{ffi::CString, sync::Arc};
 use core::alloc::{GlobalAlloc, Layout};
 use elf::ParsedElf32;
-use eonix_mm::{address::PAddr, paging::PFN};
+use eonix_mm::paging::PFN;
 use eonix_runtime::{run::FutureRun, scheduler::Scheduler, task::Task};
 use kernel::{
-    cpu::init_localcpu,
     mem::Page,
     task::{KernelStack, ProcessBuilder, ProcessList, ThreadBuilder, ThreadRunnable},
     vfs::{
@@ -64,7 +64,6 @@ fn panic(info: &core::panic::PanicInfo) -> ! {
 extern "C" {
     fn _do_allocate(size: usize) -> *mut core::ffi::c_void;
     fn _do_deallocate(ptr: *mut core::ffi::c_void, size: core::ffi::c_size_t) -> i32;
-    fn init_pci();
 }
 
 struct Allocator;
@@ -90,19 +89,11 @@ unsafe impl GlobalAlloc for Allocator {
 #[global_allocator]
 static ALLOCATOR: Allocator = Allocator;
 
-extern "C" {
-    fn init_allocator();
-}
-
 #[no_mangle]
-pub extern "C" fn rust_kinit(early_kstack_paddr: PAddr) -> ! {
-    // We don't call global constructors.
-    // Rust doesn't need that, and we're not going to use global variables in C++.
-    init_localcpu();
-
-    unsafe { init_allocator() };
-
-    kernel::interrupt::init().unwrap();
+pub extern "C" fn kernel_init(early_kstack_pfn: PFN) -> ! {
+    extern "C" {
+        fn init_pci();
+    }
 
     // TODO: Move this to rust.
     unsafe { init_pci() };
@@ -114,8 +105,7 @@ pub extern "C" fn rust_kinit(early_kstack_paddr: PAddr) -> ! {
     // So call `init_vfs` first, then `init_multitasking`.
     Scheduler::init_local_scheduler::<KernelStack>();
 
-    Scheduler::get()
-        .spawn::<KernelStack, _>(FutureRun::new(init_process(PFN::from(early_kstack_paddr))));
+    Scheduler::get().spawn::<KernelStack, _>(FutureRun::new(init_process(early_kstack_pfn)));
 
     unsafe {
         // SAFETY: `preempt::count()` == 1.

+ 26 - 25
src/mbr.S

@@ -7,17 +7,16 @@ move_mbr:
     mov %ax, %es
     mov %ax, %ss
 
-    # build a temporary stack
-    xor %esp, %esp
-    mov $0x0e00, %sp
-
+    # move the MBR to 0xe00
     mov $128, %cx # 512 bytes
     mov $0x7c00, %si
     mov $0x0e00, %di
     rep movsl
 
-    lgdt .Learly_gdt_descriptor
+    ljmp $0x00, $2f
 
+2:
+    # read the kernel stage1
     mov $.Lread_data_packet, %si
     mov $0x42, %ah
     mov $0x80, %dl
@@ -34,12 +33,12 @@ move_mbr:
     cmp $0x80, %ah # invalid command
     je .Lhalt
 
-    jcxz .Lax
+    jcxz 2f
     mov %cx, %ax
     mov %dx, %bx
 
-.Lax:
-    sub $1024, %esp
+2:
+    mov $E820_MEM_MAP_DATA, %esp
     movzw %ax, %eax
     mov %eax, 8(%esp)  # 1k blocks
     movzw %bx, %ebx
@@ -56,7 +55,7 @@ move_mbr:
     xor %ebx, %ebx
     mov %ebx, (%esp)
 
-.Le820_read_mem_map:
+2:
     # set the magic number to edx
     mov $0x534D4150, %edx
 
@@ -79,12 +78,23 @@ move_mbr:
     cmovnz 4(%esp), %ecx
     mov %ecx, 4(%esp)
 
-    jmp .Le820_read_mem_map
+    jmp 2b
 
 .Lsave_mem_fin:
+    mov $0x3ff, %ax
+    mov $BIOS_IDT_DESCRIPTOR, %di
+    mov %ax, (%di)
+
+    xor %eax, %eax
+    mov %eax, 2(%di)
+
+    lgdt .Learly_gdt_descriptor
+
     cli
-    lidt .Lnull_idt_descriptor
+    # IDT descriptor is 6 0's. borrow the null gdt entry
+    lidt .Learly_gdt
 
+    # enable protection mode
     mov %cr0, %eax
     or $1, %eax
     mov %eax, %cr0
@@ -93,30 +103,21 @@ move_mbr:
 
 .Lhalt:
     hlt
-    jmp .Lhalt
+    jmp .
 
 .align 16
 .Learly_gdt:
     .8byte 0x0                # null selector
-    .8byte 0x00cf9a000000ffff # code selector
-    .8byte 0x00cf92000000ffff # data selector
-    .8byte 0x000f9a000000ffff # 16 bit code selector
-    .8byte 0x000f92000000ffff # 16 bit data selector
-
-# null IDT descriptor
-# so that exceptions will cause the system to reset
-.align 4
-.Lnull_idt_descriptor:
-    .word 0 # size
-    .long 0 # base
+    .8byte 0x00cf9a000000ffff # 32bit code selector
+    .8byte 0x00cf92000000ffff # 32bit data selector
 
 .align 4
 .Learly_gdt_descriptor:
-    .word 0x27 # size
+    .word 0x17 # size
     .long .Learly_gdt  # address
 
 .align 16
 .Lread_data_packet:
     .long  0x00080010 # .stage1 takes up 4K, or 8 sectors
-    .long  0x00001000 # read to 0000:1000
+    .long  0x00007000 # read to 0000:7000
     .8byte 1          # read from LBA 1

+ 0 - 4
src/types/libstdcpp.cpp

@@ -2,16 +2,12 @@
 
 #include <types/types.h>
 
-#include <kernel/log.hpp>
-
 extern "C" void NORETURN __stack_chk_fail(void) {
-    assert(false);
     for (;;)
         ;
 }
 
 extern "C" void NORETURN __cxa_pure_virtual(void) {
-    assert(false);
     for (;;)
         ;
 }