1
0

2 Ревизии 1d5525f5c1 ... b67ed10025

Автор SHA1 Съобщение Дата
  greatbridf b67ed10025 feat: load huge kernel, new memory layout преди 2 месеца
  greatbridf b94122a5fb fix(buddy): create_zone with args like 12, 15 преди 2 месеца
променени са 11 файла, в които са добавени 367 реда и са изтрити 234 реда
  1. 1 1
      Cargo.toml
  2. 12 12
      doc/mem_layout.txt
  3. 3 20
      include/kernel/mem/paging.hpp
  4. 27 0
      include/kernel/mem/paging_asm.h
  5. 209 72
      src/boot.s
  6. 13 11
      src/kernel.ld
  7. 40 14
      src/kernel/mem/paging.cc
  8. 3 3
      src/kernel/process.cpp
  9. 2 2
      src/kernel/vfs/ffi.rs
  10. 19 11
      src/kinit.cpp
  11. 38 88
      src/mbr.S

+ 1 - 1
Cargo.toml

@@ -16,10 +16,10 @@ bindgen = "0.70.1"
 
 [profile.dev]
 panic = "abort"
-opt-level = 1
 
 [profile.dev.package."*"]
 opt-level = 2
+debug = false
 
 [profile.dev.build-override]
 opt-level = 0

+ 12 - 12
doc/mem_layout.txt

@@ -1,18 +1,18 @@
 physical memory
 
-0x0000 - 0x1000: GDT, TSS, LDT and some early kernel data
-0x1000 - 0x2000: kernel stage1
-0x2000 - ?     : kernel image
-
-0x100000 - 0x101000 : kernel PML4
-0x101000 - 0x102000 : kernel PDPT for physical memory mappings
-0x102000 - 0x103000 : kernel PDPT for kernel space
-0x103000 - 0x104000 : kernel PD for kernel image
-0x104000 - 0x105000 : kernel PT for kernel image
-0x105000 - 0x106000 : kernel PD for struct page array#1
-
-0x106000 - 0x200000 : unused empty pages
+0x0000 - 0x1000 : GDT, TSS, LDT and some early kernel data
+0x1000 - 0x2000 : kernel stage1
+0x2000 - 0x3000 : kernel PML4
+0x3000 - 0x4000 : kernel PDPT for physical memory mappings
+0x4000 - 0x5000 : kernel PDPT for kernel space
+0x5000 - 0x6000 : kernel PD for kernel image
+0x6000 - 0x7000 : kernel PT for kernel image
+0x7000 - 0x8000 : kernel PD for struct page array#1
+
+0x100000 - 0x200000 : unused
 0x200000 - 0x400000 : first kernel bss page (2MB)
+0x400000 - ?        : kernel image
+?        - ?        : struct page array
 
 
 virtual address space

+ 3 - 20
include/kernel/mem/paging.hpp

@@ -6,6 +6,7 @@
 
 #include <stdint.h>
 
+#include <kernel/mem/paging_asm.h>
 #include <kernel/mem/phys.hpp>
 
 namespace kernel::mem::paging {
@@ -39,21 +40,6 @@ using pfn_t = uintptr_t;
 // paging structure attributes
 using psattr_t = uintptr_t;
 
-constexpr psattr_t PA_P = 0x0000000000000001ULL;
-constexpr psattr_t PA_RW = 0x0000000000000002ULL;
-constexpr psattr_t PA_US = 0x0000000000000004ULL;
-constexpr psattr_t PA_PWT = 0x0000000000000008ULL;
-constexpr psattr_t PA_PCD = 0x0000000000000010ULL;
-constexpr psattr_t PA_A = 0x0000000000000020ULL;
-constexpr psattr_t PA_D = 0x0000000000000040ULL;
-constexpr psattr_t PA_PS = 0x0000000000000080ULL;
-constexpr psattr_t PA_G = 0x0000000000000100ULL;
-constexpr psattr_t PA_COW = 0x0000000000000200ULL;  // copy on write
-constexpr psattr_t PA_MMAP = 0x0000000000000400ULL; // memory mapped
-constexpr psattr_t PA_ANON = 0x0000000000000800ULL; // anonymous map
-constexpr psattr_t PA_NXE = 0x8000000000000000ULL;
-constexpr psattr_t PA_MASK = 0xfff0000000000fffULL;
-
 constexpr psattr_t PA_DATA = PA_P | PA_RW | PA_NXE;
 constexpr psattr_t PA_KERNEL_DATA = PA_DATA | PA_G;
 constexpr psattr_t PA_USER_DATA = PA_DATA | PA_G | PA_US;
@@ -99,11 +85,8 @@ class PSE {
     constexpr PSE parse() const noexcept { return PSE{*m_ptrbase & ~PA_MASK}; }
 };
 
-constexpr pfn_t EMPTY_PAGE_PFN = 0x7f000;
-
-constexpr uintptr_t KERNEL_PAGE_TABLE_ADDR = 0x100000;
-constexpr physaddr<void> KERNEL_PAGE_TABLE_PHYS_ADDR{KERNEL_PAGE_TABLE_ADDR};
-constexpr PSE KERNEL_PAGE_TABLE{0x100000};
+constexpr physaddr<void> KERNEL_PAGE_TABLE_PHYS_ADDR{KERNEL_PML4};
+constexpr PSE KERNEL_PAGE_TABLE{KERNEL_PML4};
 
 constexpr unsigned long PAGE_PRESENT = 0x00010000;
 constexpr unsigned long PAGE_BUDDY = 0x00020000;

+ 27 - 0
include/kernel/mem/paging_asm.h

@@ -0,0 +1,27 @@
+
+#define KERNEL_IMAGE_PADDR         0x400000
+#define KERNEL_STAGE1_PADDR        0x001000
+#define KERNEL_PML4                0x002000
+#define KERNEL_PDPT_PHYS_MAPPING   0x003000
+#define KERNEL_PDPT_KERNEL_SPACE   0x004000
+#define KERNEL_PD_KIMAGE           0x005000
+#define KERNEL_PT_KIMAGE           0x006000
+#define KERNEL_PD_STRUCT_PAGE_ARR  0x007000
+#define EMPTY_PAGE_PFN             0x008000
+
+#define KERNEL_BSS_HUGE_PAGE       0x200000
+
+#define PA_P    0x0000000000000001
+#define PA_RW   0x0000000000000002
+#define PA_US   0x0000000000000004
+#define PA_PWT  0x0000000000000008
+#define PA_PCD  0x0000000000000010
+#define PA_A    0x0000000000000020
+#define PA_D    0x0000000000000040
+#define PA_PS   0x0000000000000080
+#define PA_G    0x0000000000000100
+#define PA_COW  0x0000000000000200
+#define PA_MMAP 0x0000000000000400
+#define PA_ANON 0x0000000000000800
+#define PA_NXE  0x8000000000000000
+#define PA_MASK 0xfff0000000000fff

+ 209 - 72
src/boot.s

@@ -1,99 +1,238 @@
 .section .stage1
+
+#include <kernel/mem/paging_asm.h>
+
+.code16
+
+.align 4
+.Lbios_idt_desc:
+    .word 0x03ff     # size
+    .long 0x00000000 # base
+
+.align 4
+.Lnull_idt_desc:
+    .word 0 # size
+    .long 0 # base
+
+.Lhalt16:
+    hlt
+    jmp .Lhalt16
+
+# scratch %eax
+# return address should be of 2 bytes, and will be zero extended to 4 bytes
+go_32bit:
+    cli
+    lidt .Lnull_idt_desc
+
+    # set PE bit
+    mov %cr0, %eax
+    or $1, %eax
+    mov %eax, %cr0
+
+    ljmp $0x08, $.Lgo_32bit0
+
+.Lgo_16bit0:
+    mov $0x20, %ax
+    mov %ax, %ds
+    mov %ax, %ss
+
+    lidt .Lbios_idt_desc
+
+    mov %cr0, %eax
+    and $0xfffffffe, %eax
+    mov %eax, %cr0
+
+    ljmp $0x00, $.Lgo_16bit1
+.Lgo_16bit1:
+    xor %ax, %ax
+    mov %ax, %ds
+    mov %ax, %ss
+    mov %ax, %es
+
+    sti
+
+    pop %eax
+    push %ax
+    ret
+
+.code32
+# scratch %eax
+# return address should be of 4 bytes, and extra 2 bytes will be popped from the stack
+go_16bit:
+    cli
+    ljmp $0x18, $.Lgo_16bit0
+
+.Lgo_32bit0:
+    mov $0x10, %ax
+    mov %ax, %ds
+    mov %ax, %es
+    mov %ax, %ss
+
+    pop %ax
+    movzw %ax, %eax
+    push %eax
+    ret
+
+# build read disk packet on the stack and perform read operation
+#
+# read 32k to 0x2000 and then copy to destination
+#
+# %edi: lba start
+# %esi: destination
 .code32
+read_disk:
+    push %ebp
+    mov %esp, %ebp
+
+    lea -24(%esp), %esp
+
+    mov $0x00400010, %eax # packet size 0, sector count 64
+    mov %eax, (%esp)
+
+    mov $0x02000000, %eax # destination address 0x0200:0x0000
+    mov %eax, 4(%esp)
+
+    mov %edi, 8(%esp)  # lba low 4bytes
+
+    xor %eax, %eax
+    mov %eax, 12(%esp) # lba high 2bytes
+
+    mov %esi, %edi
+    mov %esp, %esi # packet address
+
+    call go_16bit
+.code16
+    mov $0x42, %ah
+    mov $0x80, %dl
+    int $0x13
+    jc .Lhalt16
+
+    call go_32bit
+.code32
+    # move data to destination
+    mov $0x2000, %esi
+    mov $8192, %ecx
+    rep movsl
+
+    mov %ebp, %esp
+    pop %ebp
+    ret
+
 .globl start_32bit
 start_32bit:
     mov $0x10, %ax
     mov %ax, %ds
     mov %ax, %es
-    mov %ax, %fs
-    mov %ax, %gs
     mov %ax, %ss
 
+    # read kimage into memory
+	lea -16(%esp), %esp
+    mov $KIMAGE_32K_COUNT, %ecx
+    mov $KERNEL_IMAGE_PADDR, 4(%esp) # destination address
+	mov $9, (%esp) # LBA
+
+.Lread_kimage:
+	mov (%esp), %edi
+	mov 4(%esp), %esi
+
+	mov %ecx, %ebx
+    call read_disk
+	mov %ebx, %ecx
+
+    add $0x8000, 4(%esp)
+	add $64, (%esp)
+
+    loop .Lread_kimage
+
+	lea 16(%esp), %esp
+
     cld
     xor %eax, %eax
 
     # clear paging structures
-    mov $0x100000, %edi
-    mov %edi, %ecx
+    mov $0x2000, %edi
+    mov $0x6000, %ecx
     shr $2, %ecx # %ecx /= 4
     rep stosl
 
     # set P, RW, G
-    mov $0x00000103, %ebx
-	xor %edx, %edx
-    mov $0x00101000, %esi
+    mov $(PA_P | PA_RW | PA_G), %ebx
+    xor %edx, %edx
+    mov $KERNEL_PDPT_PHYS_MAPPING, %esi
 
     # PML4E 0x000
     # we need the first 1GB identically mapped
     # so that we won't trigger a triple fault after
     # enabling paging
-	lea -0x1000(%esi), %edi # %edi = 0x100000
+    mov $KERNEL_PML4, %edi
     call fill_pxe
 
     # PML4E 0xff0
-	mov $0x80000000, %edx
-	lea 0xff0(%edi), %edi
-	call fill_pxe
-	xor %edx, %edx
+    mov $(PA_NXE >> 32), %edx
+    lea 0xff0(%edi), %edi
+    call fill_pxe
+    xor %edx, %edx
 
     # setup PDPT for physical memory mapping
-    mov %esi, %edi
+    mov $KERNEL_PDPT_PHYS_MAPPING, %edi
 
     # set PS
-    or $0x00000080, %ebx
+    or $PA_PS, %ebx
     mov $256, %ecx
     xor %esi, %esi
-_fill_loop1:
+.Lfill1:
     call fill_pxe
     lea 8(%edi), %edi
     add $0x40000000, %esi # 1GB
     adc $0, %edx
-    loop _fill_loop1
+    loop .Lfill1
 
-	mov $0x80000000, %edx
+    mov $(PA_NXE >> 32), %edx
 
     # set PCD, PWT
-    or $0x00000018, %ebx
+    or $(PA_PCD | PA_PWT), %ebx
     mov $256, %ecx
     xor %esi, %esi
-_fill_loop2:
+.Lfill2:
     call fill_pxe
     lea 8(%edi), %edi
     add $0x40000000, %esi # 1GB
     adc $0, %edx
-    loop _fill_loop2
+    loop .Lfill2
 
-	xor %edx, %edx
+    xor %edx, %edx
 
     # PML4E 0xff8
-    mov %edi, %esi # 0x102000
-    mov $0x100ff8, %edi
+    mov $KERNEL_PDPT_KERNEL_SPACE, %esi
+    mov $KERNEL_PML4, %edi
+    lea 0xff8(%edi), %edi
     # clear PCD, PWT, PS
-    and $(~0x00000098), %ebx
+    and $(~(PA_PCD | PA_PWT | PA_PS)), %ebx
     call fill_pxe
 
     # PDPTE 0xff8
-    lea 0xff8(%esi), %edi  # 0x102ff8
-    lea 0x1000(%esi), %esi # 0x103000
+    mov $KERNEL_PDPT_KERNEL_SPACE, %edi
+    lea 0xff8(%edi), %edi
+    mov $KERNEL_PD_KIMAGE, %esi
     call fill_pxe
 
     # PDE 0xff0
-    lea 0xff0(%esi), %edi  # 0x103ff0
-    lea 0x1000(%esi), %esi # 0x104000
+    mov $KERNEL_PD_KIMAGE, %edi
+    lea 0xff0(%edi), %edi
+    mov $KERNEL_PT_KIMAGE, %esi # 0x104000
     call fill_pxe
 
     # fill PT (kernel image)
-    mov %esi, %edi # 0x104000
-    mov $0x2000, %esi
+    mov $KERNEL_PT_KIMAGE, %edi
+    mov $KERNEL_IMAGE_PADDR, %esi
 
-.extern KERNEL_PAGES
     mov $KIMAGE_PAGES, %ecx
 
-_fill_loop3:
+.Lfill3:
     call fill_pxe
     lea 8(%edi), %edi
-	lea 0x1000(%esi), %esi
-    loop _fill_loop3
+    lea 0x1000(%esi), %esi
+    loop .Lfill3
 
     # set msr
     mov $0xc0000080, %ecx
@@ -107,9 +246,7 @@ _fill_loop3:
     mov %eax, %cr4
 
     # load new page table
-	xor %eax, %eax
-	inc %eax
-	shl $20, %eax # %eax = 0x100000
+    mov $KERNEL_PML4, %eax
     mov %eax, %cr3
 
     mov %cr0, %eax
@@ -118,28 +255,28 @@ _fill_loop3:
     mov %eax, %cr0
 
     # create gdt
-	xor %eax, %eax # at 0x0000
-	mov %eax, 0x00(%eax)
-	mov %eax, 0x04(%eax) # null descriptor
-	mov %eax, 0x08(%eax) # code segment lower
-	mov %eax, 0x10(%eax) # data segment lower
-	mov $0x00209a00, %ecx
-	mov %ecx, 0x0c(%eax) # code segment higher
-	mov $0x00009200, %ecx
-	mov %ecx, 0x14(%eax) # data segment higher
+    xor %eax, %eax # at 0x0000
+    mov %eax, 0x00(%eax)
+    mov %eax, 0x04(%eax) # null descriptor
+    mov %eax, 0x08(%eax) # code segment lower
+    mov %eax, 0x10(%eax) # data segment lower
+    mov $0x00209a00, %ecx
+    mov %ecx, 0x0c(%eax) # code segment higher
+    mov $0x00009200, %ecx
+    mov %ecx, 0x14(%eax) # data segment higher
 
     # gdt descriptor
-	push %eax
-	push %eax
+    push %eax
+    push %eax
 
     # pad with a word
-	mov $0x00170000, %eax
-	push %eax
+    mov $0x00170000, %eax
+    push %eax
 
-	lgdt 2(%esp)
-	add $12, %esp
+    lgdt 2(%esp)
+    add $12, %esp
 
-    ljmp $0x08, $_64bit_entry
+    ljmp $0x08, $.L64bit_entry
 
 # %ebx: attribute low
 # %edx: attribute high
@@ -153,33 +290,33 @@ fill_pxe:
     ret
 
 .code64
-_64bit_entry:
-	jmp start_64bit
+.L64bit_entry:
+    jmp start_64bit
 
 .section .text.kinit
 start_64bit:
     # set stack pointer and clear stack bottom
-	movzw %sp, %rdi
-	xor %rsp, %rsp
-	inc %rsp
-	neg %rsp
-	shr $40, %rsp
-	shl $40, %rsp
+    mov %rsp, %rdi
+    xor %rsp, %rsp
+    inc %rsp
+    neg %rsp
+    shr $40, %rsp
+    shl $40, %rsp
 
-	add %rdi, %rsp
-	mov %rsp, %rdi
+    add %rdi, %rsp
+    mov %rsp, %rdi
 
     # make stack frame
-	lea -16(%rsp), %rsp
-	mov %rsp, %rbp
+    lea -16(%rsp), %rsp
+    mov %rsp, %rbp
 
-	xor %rax, %rax
-	mov %rax, (%rsp)
-	mov %rax, 8(%rsp)
+    xor %rax, %rax
+    mov %rax, (%rsp)
+    mov %rax, 8(%rsp)
 
     call kernel_init
 
-_64bit_hlt:
-	cli
-	hlt
-	jmp _64bit_hlt
+.L64bit_hlt:
+    cli
+    hlt
+    jmp .L64bit_hlt

+ 13 - 11
src/kernel.ld

@@ -16,6 +16,9 @@ SECTIONS
     {
         KEEP(*(.mbr));
 
+        . = 446;
+        BYTE(0x00);
+
         . = 510;
         BYTE(0x55);
         BYTE(0xaa);
@@ -23,11 +26,8 @@ SECTIONS
 
     .stage1 : AT(LOADADDR(.mbr) + SIZEOF(.mbr))
     {
-        __stage1_start = .;
         *(.stage1)
-
         . = ALIGN(0x1000);
-        __stage1_end = .;
     } > STAGE1
 
     .kinit :
@@ -50,6 +50,9 @@ SECTIONS
         KINIT_PAGES = .;
         QUAD((KINIT_END - KINIT_START) / 0x1000);
 
+        KIMAGE_PAGES_VALUE = .;
+        QUAD((KIMAGE_END - KIMAGE_START) / 0x1000);
+
         . = ALIGN(16);
         start_ctors = .;
         KEEP(*(.init_array));
@@ -119,15 +122,13 @@ SECTIONS
         *(.got)
         *(.got.plt)
 
-        . = ALIGN(0x1000);
+        . = ALIGN(0x1000) - 4;
+        LONG(KERNEL_MAGIC);
+
         DATA_END = .;
         KIMAGE_END = .;
     } > KIMAGE
 
-    .sentry :
-        AT(0x78000 - 0x4)
-    { LONG(0x01145140); } > KIMAGE
-
     .bss :
     {
         BSS_START = .;
@@ -140,14 +141,15 @@ SECTIONS
 
     KIMAGE_PAGES = (KIMAGE_END - KIMAGE_START) / 0x1000;
     BSS_PAGES = (BSS_END - BSS_START) / 0x1000;
+    KERNEL_MAGIC = 0x01145140;
+
+    KIMAGE_32K_COUNT = ((KIMAGE_END - KIMAGE_START) + 32 * 1024 - 1) / (32 * 1024);
 
     .eh_frame :
-        AT(LOADADDR(.sentry) + SIZEOF(.sentry))
+        AT(LOADADDR(.data) + SIZEOF(.data))
     {
-        __eh_frame_start = .;
         KEEP(*(.eh_frame*))
         . = ALIGN(0x1000);
-        __eh_frame_end = .;
     } > KIMAGE
 
     /* Stabs debugging sections.  */

+ 40 - 14
src/kernel/mem/paging.cc

@@ -128,6 +128,45 @@ static inline page* _alloc_zone(unsigned order) {
     return nullptr;
 }
 
+constexpr uintptr_t _find_mid(uintptr_t l, uintptr_t r) {
+    if (l == r)
+        return l;
+    uintptr_t bit = 1 << _msb(l ^ r);
+
+    return (l & r & ~(bit - 1)) | bit;
+}
+
+static void _recur_create_zone(uintptr_t l, uintptr_t r) {
+    auto mid = _find_mid(l, r);
+    assert(l <= mid);
+
+    // empty zone
+    if (l == mid) {
+        assert(l == r);
+        return;
+    }
+
+    // create [l, r) directly
+    if (r == mid) {
+        auto diff = r - l;
+        int order = 0;
+        while ((1u << order) <= diff) {
+            while (!(diff & (1 << order)))
+                order++;
+            _create_zone(l << 12, order);
+
+            l += (1 << order);
+            diff &= ~(1 << order);
+        }
+
+        return;
+    }
+
+    // split into halves
+    _recur_create_zone(l, mid);
+    _recur_create_zone(mid, r);
+}
+
 void kernel::mem::paging::create_zone(uintptr_t start, uintptr_t end) {
     start += (4096 - 1);
     start >>= 12;
@@ -138,20 +177,7 @@ void kernel::mem::paging::create_zone(uintptr_t start, uintptr_t end) {
 
     lock_guard_irq lock{zone_lock};
 
-    unsigned long low = start;
-    for (unsigned i = 0; i < _msb(end); ++i, low >>= 1) {
-        if (!(low & 1))
-            continue;
-        _create_zone(low << (12 + i), i);
-        low++;
-    }
-
-    low = 1 << _msb(end);
-    while (low < end) {
-        unsigned order = _msb(end - low);
-        _create_zone(low << 12, order);
-        low |= (1 << order);
-    }
+    _recur_create_zone(start, end);
 }
 
 void kernel::mem::paging::mark_present(uintptr_t start, uintptr_t end) {

+ 3 - 3
src/kernel/process.cpp

@@ -191,12 +191,12 @@ static void release_kinit() {
     extern uintptr_t volatile KINIT_START_ADDR, KINIT_END_ADDR, KINIT_PAGES;
 
     std::size_t pages = KINIT_PAGES;
-    auto range = vaddr_range{KERNEL_PAGE_TABLE_ADDR, KINIT_START_ADDR,
-                             KINIT_END_ADDR, true};
+    auto range =
+        vaddr_range{KERNEL_PML4, KINIT_START_ADDR, KINIT_END_ADDR, true};
     for (auto pte : range)
         pte.clear();
 
-    create_zone(0x2000, 0x2000 + 0x1000 * pages);
+    create_zone(KERNEL_IMAGE_PADDR, KERNEL_IMAGE_PADDR + 0x1000 * pages);
 }
 
 extern "C" void (*const late_init_start[])();

+ 2 - 2
src/kernel/vfs/ffi.rs

@@ -293,11 +293,11 @@ pub extern "C" fn r_get_inode_mode(mut inode: *const Inode) -> mode_t {
 }
 
 #[no_mangle]
-pub extern "C" fn r_get_inode_size(mut inode: *const Inode) -> mode_t {
+pub extern "C" fn r_get_inode_size(mut inode: *const Inode) -> u64 {
     let inode = inode_from_raw(&mut inode);
     let idata = inode.idata.lock();
 
-    idata.size as _
+    idata.size
 }
 
 extern "C" {

+ 19 - 11
src/kinit.cpp

@@ -96,15 +96,17 @@ static inline void setup_early_kernel_page_table() {
     auto pd = pdpt[std::get<2>(idx)].parse();
 
     // kernel bss, size 2M
-    pd[std::get<3>(idx)].set(PA_KERNEL_DATA_HUGE, 0x200000);
+    pd[std::get<3>(idx)].set(PA_KERNEL_DATA_HUGE, KERNEL_BSS_HUGE_PAGE);
 
     // clear kernel bss
     memset((void*)BSS_ADDR, 0x00, BSS_LENGTH);
 
     // clear empty page
-    memset(mem::physaddr<void>{EMPTY_PAGE_PFN}, 0x00, 0x1000);
+    memset(mem::physaddr<void>{(uintptr_t)EMPTY_PAGE_PFN}, 0x00, 0x1000);
 }
 
+extern "C" uintptr_t KIMAGE_PAGES_VALUE;
+
 SECTION(".text.kinit")
 static inline void setup_buddy(uintptr_t addr_max) {
     using namespace kernel::mem;
@@ -115,16 +117,20 @@ static inline void setup_buddy(uintptr_t addr_max) {
     addr_max >>= 12;
     int count = (addr_max * sizeof(page) + 0x200000 - 1) / 0x200000;
 
-    pfn_t start_pfn = 0x400000;
+    pfn_t real_start_pfn = KERNEL_IMAGE_PADDR + KIMAGE_PAGES_VALUE * 0x1000;
+    pfn_t aligned_start_pfn = real_start_pfn + 0x200000 - 1;
+    aligned_start_pfn &= ~0x1fffff;
+
+    pfn_t saved_start_pfn = aligned_start_pfn;
 
-    memset(physaddr<void>{0x105000}, 0x00, 4096);
+    memset(physaddr<void>{KERNEL_PD_STRUCT_PAGE_ARR}, 0x00, 4096);
 
     auto pdpte = KERNEL_PAGE_TABLE[std::get<1>(idx)].parse()[std::get<2>(idx)];
-    pdpte.set(PA_KERNEL_PAGE_TABLE, 0x105000);
+    pdpte.set(PA_KERNEL_PAGE_TABLE, KERNEL_PD_STRUCT_PAGE_ARR);
 
     auto pd = pdpte.parse();
-    for (int i = 0; i < count; ++i, start_pfn += 0x200000)
-        pd[std::get<3>(idx) + i].set(PA_KERNEL_DATA_HUGE, start_pfn);
+    for (int i = 0; i < count; ++i, aligned_start_pfn += 0x200000)
+        pd[std::get<3>(idx) + i].set(PA_KERNEL_DATA_HUGE, aligned_start_pfn);
 
     PAGE_ARRAY = (page*)0xffffff8040000000ULL;
     memset(PAGE_ARRAY, 0x00, addr_max * sizeof(page));
@@ -138,11 +144,11 @@ static inline void setup_buddy(uintptr_t addr_max) {
 
         auto start = ent.base;
         auto end = start + ent.len;
-        if (end <= start_pfn)
+        if (end <= aligned_start_pfn)
             continue;
 
-        if (start < start_pfn)
-            start = start_pfn;
+        if (start < aligned_start_pfn)
+            start = aligned_start_pfn;
 
         if (start > end)
             continue;
@@ -153,7 +159,9 @@ static inline void setup_buddy(uintptr_t addr_max) {
     // free .stage1
     create_zone(0x1000, 0x2000);
     // unused space
-    create_zone(0x106000, 0x200000);
+    create_zone(0x9000, 0x80000);
+    create_zone(0x100000, 0x200000);
+    create_zone(real_start_pfn, saved_start_pfn);
 }
 
 SECTION(".text.kinit")

+ 38 - 88
src/mbr.S

@@ -8,80 +8,37 @@ move_mbr:
     mov %ax, %ss
 
     # build a temporary stack
-    mov $0x0e00, %esp
-    mov %esp, %ebp
+    xor %esp, %esp
+    mov $0x0e00, %sp
 
-    mov $128, %cx # 512 / 4
+    mov $128, %cx # 512 bytes
     mov $0x7c00, %si
     mov $0x0e00, %di
     rep movsl
 
-    ljmp $0x00, $mbr_start
+    lgdt .Learly_gdt_descriptor
 
-# %eax: lba lower 4bytes
-# %edx: destination address
-read_disk:
-	push %eax
-	push %edx
-	push %ecx
-
-	mov %eax, read_data_lba
-	shr $4, %edx
-	mov %dx, read_data_segment
-
-    mov $read_data_pack, %si
+    mov $.Lread_data_packet, %si
     mov $0x42, %ah
     mov $0x80, %dl
     int $0x13
-    jc halt
-
-	pop %ecx
-	pop %edx
-	pop %eax
-	ret
-
-mbr_start:
-    # clear screen
-    mov $0x00, %ah
-    mov $0x03, %al
-    int $0x10
-
-    # read kernel image: 32K * 15 = 480K
-	xor %eax, %eax
-	inc %eax # %eax = 1
-	mov %eax, %edx
-	shl $12, %edx # %edx = 0x1000
-
-	mov $15, %ecx
-_loop_read_kernel:
-	call read_disk
-	add $64, %eax # %eax += 64
-
-	shr $12, %edx
-	add $8, %edx
-	shl $12, %edx # %edx += 32K
-
-	loop _loop_read_kernel
+    jc .Lhalt
 
     # get memory size info and storage it
-    xor %ecx, %ecx
-    xor %edx, %edx
-	xor %eax, %eax
     mov $0xe801, %ax
-
     int $0x15
-    jc halt
+    jc .Lhalt
 
     cmp $0x86, %ah # unsupported function
-    je halt
+    je .Lhalt
     cmp $0x80, %ah # invalid command
-    je halt
+    je .Lhalt
 
-    jcxz _get_memory_size_use_ax
+    jcxz .Lax
     mov %cx, %ax
     mov %dx, %bx
 
-_get_memory_size_use_ax:
+.Lax:
     sub $1024, %esp
     movzw %ax, %eax
     mov %eax, 8(%esp)  # 1k blocks
@@ -89,16 +46,17 @@ _get_memory_size_use_ax:
     mov %ebx, 12(%esp) # 64k blocks
 
     # save the destination address to es:di
-    lea 16(%esp), %di # buffer is 1024 - 16 bytes
+    mov %sp, %di
+    add $16, %di # buffer is 1024 - 16 bytes
+
+    # set default entry size
+    movl $20, 4(%esp)
 
     # clear %ebx, len
     xor %ebx, %ebx
     mov %ebx, (%esp)
 
-    # set default entry size
-    movl $20, 4(%esp)
-
-_e820_mem_map_load_loop:
+.Le820_read_mem_map:
     # set the magic number to edx
     mov $0x534D4150, %edx
 
@@ -113,60 +71,52 @@ _e820_mem_map_load_loop:
     incl (%esp)
     add $24, %edi
 
-    jc _e820_mem_map_load_fin
+    jc .Lsave_mem_fin
     cmp $0, %ebx
-    jz _e820_mem_map_load_fin
+    jz .Lsave_mem_fin
 
     cmp $24, %ecx
     cmovnz 4(%esp), %ecx
     mov %ecx, 4(%esp)
 
-    jmp _e820_mem_map_load_loop
+    jmp .Le820_read_mem_map
 
-_e820_mem_map_load_fin:
-    # load GDT and IDT
+.Lsave_mem_fin:
     cli
-    lidt null_idt_descriptor
-    lgdt _32bit_gdt_descriptor
+    lidt .Lnull_idt_descriptor
 
-    # enable protection enable (PE) bit
     mov %cr0, %eax
     or $1, %eax
     mov %eax, %cr0
 
     ljmp $0x08, $start_32bit
 
-halt:
+.Lhalt:
     hlt
-    jmp halt
+    jmp .Lhalt
 
 .align 16
-read_data_pack:
-    .byte 0x10, 0
-read_data_count:
-    .word 64     # sector count (read 32k)
-read_data_offset:
-    .word 0x0000 # offset address
-read_data_segment:
-    .word 0x0100 # segment address
-read_data_lba:
-    .long 1      # lower 4 bytes of the LBA to read
-    .long 0      # higher 2 bytes of the LBA to read
+.Learly_gdt:
+    .8byte 0x0                # null selector
+    .8byte 0x00cf9a000000ffff # code selector
+    .8byte 0x00cf92000000ffff # data selector
+    .8byte 0x000f9a000000ffff # 16 bit code selector
+    .8byte 0x000f92000000ffff # 16 bit data selector
 
 # null IDT descriptor
 # so that exceptions will cause the system to reset
 .align 4
-null_idt_descriptor:
+.Lnull_idt_descriptor:
     .word 0 # size
     .long 0 # base
 
 .align 4
-_32bit_gdt_descriptor:
-    .word (3 * 8) - 1 # size
-    .long _32bit_gdt  # address
+.Learly_gdt_descriptor:
+    .word 0x27 # size
+    .long .Learly_gdt  # address
 
 .align 16
-_32bit_gdt:
-    .8byte 0x0                # null selector
-    .8byte 0x00cf9a000000ffff # code selector
-    .8byte 0x00cf92000000ffff # data selector
+.Lread_data_packet:
+    .long  0x00080010 # .stage1 takes up 4K, or 8 sectors
+    .long  0x00001000 # read to 0000:1000
+    .8byte 1          # read from LBA 1