Browse Source

prepare the memory layout for smp cpu init

greatbridf 4 tháng trước cách đây
mục cha
commit
bfcf57b9a0
8 tập tin đã thay đổi với 475 bổ sung28 xóa
  1. 1 0
      CMakeLists.txt
  2. 1 0
      include/kernel/mem/paging_asm.h
  3. 55 0
      src/boot.s
  4. 35 13
      src/kernel.ld
  5. 323 0
      src/kernel/mem/mm_list.cc
  6. 4 5
      src/kernel/mem/page_table.rs
  7. 0 5
      src/kinit.cpp
  8. 56 5
      src/lib.rs

+ 1 - 0
CMakeLists.txt

@@ -83,6 +83,7 @@ target_include_directories(kernel.out PRIVATE ${PROJECT_SOURCE_DIR}/include)
 target_link_options(kernel.out PRIVATE
     -T "${CMAKE_SOURCE_DIR}/src/kernel.ld"
     -L "${CMAKE_BINARY_DIR}/x86_64-unknown-none/${CARGO_BUILD_TYPE}"
+    --no-check-sections
     )
 set_target_properties(kernel.out PROPERTIES LINK_DEPENDS "${CMAKE_SOURCE_DIR}/src/kernel.ld")
 set_source_files_properties(src/mbr.S PROPERTIES OBJECT_DEPENDS

+ 1 - 0
include/kernel/mem/paging_asm.h

@@ -9,6 +9,7 @@
 
 #define KERNEL_BSS_HUGE_PAGE       0x200000
 
+
 #define PA_P    0x0000000000000001
 #define PA_RW   0x0000000000000002
 #define PA_US   0x0000000000000004

+ 55 - 0
src/boot.s

@@ -321,3 +321,58 @@ start_64bit:
     cli
     hlt
     jmp .L64bit_hlt
+
+.section .stage1.smp
+.code16
+
+.globl ap_bootstrap
+.type ap_bootstrap, @function
+ap_bootstrap:
+	ljmp $0x0, $.Lap1
+
+.Lap1:
+    # we use a shared gdt for now
+	lgdt shared_gdt_desc
+
+    # set msr
+    mov $0xc0000080, %ecx
+    rdmsr
+    or $0x901, %eax # set LME, NXE, SCE
+    wrmsr
+
+    # set cr4
+    mov %cr4, %eax
+    or $0xa0, %eax # set PAE, PGE
+    mov %eax, %cr4
+
+    # load new page table
+    mov $KERNEL_PML4, %eax
+    mov %eax, %cr3
+
+    mov %cr0, %eax
+    // SET PE, WP, PG
+    or $0x80010001, %eax
+    mov %eax, %cr0
+
+	ljmp $0x08, $.Lap_bootstrap_end
+
+.align 16
+shared_gdt_desc:
+	.8byte 0x0000000000005f
+
+.code64
+.Lap_bootstrap_end:
+    mov $0x10, %ax
+	mov %ax, %ds
+	mov %ax, %es
+	mov %ax, %ss
+
+	lock incl boot_semaphore
+	jmp .
+
+.section .bss
+.align 4
+.globl boot_semaphore
+.type boot_semaphore, @object
+boot_semaphore:
+	.long 0

+ 35 - 13
src/kernel.ld

@@ -2,12 +2,13 @@ OUTPUT_FORMAT(elf64-x86-64)
 
 MEMORY
 {
-    MBR    (wx) : org = 0x0e00, l = 512
-    STAGE1 (wx) : org = 0x1000, l = 4K
-    PHYMEM (w)  : org = 0xffffff0000000000, len = 512 * 1024M
-    PARRAY (w)  : org = 0xffffff8000000000, len = 128 * 1024M
-    KBSS   (w)  : org = 0xffffffffc0200000, len = 2M
-    KIMAGE (wx) : org = 0xffffffffffc00000, len = 2M
+    MBR           (wx) : org = 0x0e00, l = 512
+    STAGE1        (wx) : org = 0x1000, l = 4K
+    PHYMEM        (w)  : org = 0xffffff0000000000, len = 512 * 1024M
+    PARRAY        (w)  : org = 0xffffff8000000000, len = 128 * 1024M
+    KBSS          (w)  : org = 0xffffffffc0200000, len = 2M
+    KIMAGE        (wx) : org = 0xffffffffffc00000, len = 2M
+    KPERCPU       (w)  : org = 0x0000000000000000, len = 128K
 }
 
 SECTIONS
@@ -26,14 +27,17 @@ SECTIONS
 
     .stage1 : AT(LOADADDR(.mbr) + SIZEOF(.mbr))
     {
+        KEEP(*(.stage1.smp));
+
+        . = ALIGN(16);
         *(.stage1)
+
         . = ALIGN(0x1000);
     } > STAGE1
 
     .text :
         AT(LOADADDR(.stage1) + SIZEOF(.stage1))
     {
-        KIMAGE_START = .;
         TEXT_START = .;
         *(.text)
         *(.text*)
@@ -42,6 +46,8 @@ SECTIONS
         TEXT_END = .;
     } > KIMAGE
 
+    TEXT_PAGES = (TEXT_END - TEXT_START) / 0x1000;
+
     .rodata :
         AT(LOADADDR(.text) + SIZEOF(.text))
     {
@@ -76,8 +82,9 @@ SECTIONS
         RODATA_END = .;
     } > KIMAGE
 
-    .data :
-        AT(LOADADDR(.rodata) + SIZEOF(.rodata))
+    RODATA_PAGES = (RODATA_END - RODATA_START) / 0x1000;
+
+    .data : AT(LOADADDR(.rodata) + SIZEOF(.rodata))
     {
         DATA_START = .;
         *(.data)
@@ -89,11 +96,26 @@ SECTIONS
         . = . + 4;
         . = ALIGN(0x1000) - 4;
         LONG(KERNEL_MAGIC);
-
         DATA_END = .;
-        KIMAGE_END = .;
     } > KIMAGE
 
+    DATA_PAGES = (DATA_END - DATA_START) / 0x1000;
+
+    _PERCPU_DATA_START = .;
+    .percpu 0 (NOLOAD) : AT(LOADADDR(.data) + SIZEOF(.data))
+    {
+        PERCPU_START = .;
+        . += 0x10; /* Reserved for x86 percpu pointer */
+
+        *(.percpu .percpu*)
+
+        . = ALIGN(0x1000);
+        PERCPU_END = .;
+    } > KPERCPU
+    _PERCPU_LENGTH = PERCPU_END - PERCPU_START;
+
+    PERCPU_PAGES = _PERCPU_LENGTH / 0x1000;
+
     .bss :
     {
         BSS_START = .;
@@ -104,11 +126,11 @@ SECTIONS
         BSS_END = .;
     } > KBSS
 
-    KIMAGE_PAGES = (KIMAGE_END - KIMAGE_START) / 0x1000;
+    KIMAGE_PAGES = TEXT_PAGES + RODATA_PAGES + PERCPU_PAGES + DATA_PAGES;
     BSS_PAGES = (BSS_END - BSS_START) / 0x1000;
     KERNEL_MAGIC = 0x01145140;
 
-    KIMAGE_32K_COUNT = ((KIMAGE_END - KIMAGE_START) + 32 * 1024 - 1) / (32 * 1024);
+    KIMAGE_32K_COUNT = (KIMAGE_PAGES * 0x1000 + 32 * 1024 - 1) / (32 * 1024);
 
     .eh_frame :
         AT(LOADADDR(.data) + SIZEOF(.data))

+ 323 - 0
src/kernel/mem/mm_list.cc

@@ -0,0 +1,323 @@
+#include <assert.h>
+#include <errno.h>
+#include <stdint.h>
+
+#include <kernel/mem/mm_list.hpp>
+#include <kernel/mem/paging.hpp>
+#include <kernel/mem/vm_area.hpp>
+
+using namespace kernel::mem;
+
+static inline void __invalidate_all_tlb() {
+    asm volatile(
+        "mov %%cr3, %%rax\n\t"
+        "mov %%rax, %%cr3\n\t"
+        :
+        :
+        : "rax", "memory");
+}
+
+static inline void __dealloc_page_table_all(paging::pfn_t pt, int depth, int from, int to) {
+    using namespace paging;
+
+    if (depth > 1) {
+        for (int i = from; i < to; ++i) {
+            auto pse = PSE{pt}[i];
+            if (!(pse.attributes() & PA_P))
+                continue;
+
+            int pfn = pse.pfn();
+            __dealloc_page_table_all(pfn, depth - 1, 0, 512);
+        }
+    }
+
+    free_page(pt);
+}
+
+static inline void __dealloc_page_table(paging::pfn_t pt) {
+    using namespace paging;
+    auto start_idx = idx_p4(0);
+    auto end_idx = idx_p4(KERNEL_SPACE_START);
+
+    __dealloc_page_table_all(pt, 4, start_idx, end_idx);
+}
+
+mm_list::mm_list() : m_pt{paging::alloc_page_table()}, m_brk{m_areas.end()} {
+    // copy only kernel space
+    memcpy(physaddr<void>{m_pt + 0x800}, physaddr<void>{KERNEL_PML4 + 0x800}, 0x800);
+}
+
+mm_list::mm_list(const mm_list& other) : mm_list{} {
+    m_areas = other.m_areas;
+
+    using namespace paging;
+    for (auto iter = m_areas.begin(); iter != m_areas.end(); ++iter) {
+        auto& area = *iter;
+
+        if (area.flags & MM_BREAK)
+            m_brk = iter;
+
+        auto this_iter = vaddr_range{m_pt, area.start, area.end};
+        auto other_iter = vaddr_range{other.m_pt, area.start, area.end};
+
+        while (this_iter) {
+            auto this_pte = *this_iter, other_pte = *other_iter;
+            auto attributes = other_pte.attributes();
+            auto pfn = other_pte.pfn();
+
+            attributes &= ~(PA_RW | PA_A | PA_D);
+            attributes |= PA_COW;
+            this_pte.set(attributes, pfn);
+
+            increase_refcount(pfn_to_page(pfn));
+
+            // TODO: create a function to set COW mappings
+            attributes = other_pte.attributes();
+            attributes &= ~PA_RW;
+            attributes |= PA_COW;
+            other_pte.set(attributes, pfn);
+
+            ++this_iter, ++other_iter;
+        }
+    }
+
+    __invalidate_all_tlb();
+}
+
+mm_list::~mm_list() {
+    if (!m_pt)
+        return;
+
+    clear();
+    __dealloc_page_table(m_pt);
+}
+
+bool mm_list::is_avail(uintptr_t start, std::size_t len) const noexcept {
+    start &= ~0xfff;
+    uintptr_t end = (start + len + 0xfff) & ~0xfff;
+    len = end - start;
+
+    if (end > USER_SPACE_MEMORY_TOP)
+        return false;
+
+    for (const auto& area : m_areas) {
+        if (!area.is_avail(start, end))
+            return false;
+    }
+    return true;
+}
+
+bool mm_list::is_avail(uintptr_t addr) const {
+    if (addr >= USER_SPACE_MEMORY_TOP)
+        return false;
+
+    auto iter = m_areas.find(addr);
+    return iter == m_areas.end();
+}
+
+uintptr_t mm_list::find_avail(uintptr_t hint, size_t len) const {
+    auto addr = std::max(hint, MMAP_MIN_ADDR);
+
+    while (!is_avail(addr, len)) {
+        auto iter = m_areas.lower_bound(addr);
+        if (iter == m_areas.end())
+            return 0;
+
+        addr = iter->end;
+    }
+
+    return addr;
+}
+
+void mm_list::switch_pd() const noexcept {
+    asm volatile("mov %0, %%cr3" : : "r"(m_pt) : "memory");
+}
+
+int mm_list::register_brk(uintptr_t addr) {
+    assert(m_brk == m_areas.end());
+    if (!is_avail(addr))
+        return -ENOMEM;
+
+    bool inserted;
+    std::tie(m_brk, inserted) = m_areas.emplace(addr, MM_ANONYMOUS | MM_WRITE | MM_BREAK);
+
+    assert(inserted);
+    return 0;
+}
+
+uintptr_t mm_list::set_brk(uintptr_t addr) {
+    using namespace paging;
+    assert(m_brk != m_areas.end());
+    uintptr_t curbrk = m_brk->end;
+
+    addr += 4096 - 1;
+    addr &= ~0xfff;
+
+    if (addr <= curbrk || !is_avail(curbrk, addr - curbrk))
+        return curbrk;
+
+    for (auto pte : vaddr_range{m_pt, curbrk, addr})
+        pte.set(PA_ANONYMOUS_PAGE | PA_NXE, EMPTY_PAGE_PFN);
+
+    m_brk->end = addr;
+    return m_brk->end;
+}
+
+void mm_list::clear() {
+    for (auto iter = m_areas.begin(); iter != m_areas.end(); ++iter)
+        unmap(iter, false);
+
+    __invalidate_all_tlb();
+
+    m_areas.clear();
+    m_brk = m_areas.end();
+}
+
+mm_list::iterator mm_list::split(iterator area, uintptr_t addr) {
+    assert(!(addr & 0xfff));
+    assert(addr > area->start && addr < area->end);
+
+    std::size_t old_len = addr - area->start;
+    std::size_t new_file_offset = 0;
+
+    if (area->mapped_file)
+        new_file_offset = area->file_offset + old_len;
+
+    auto new_end = area->end;
+    area->end = addr;
+
+    auto [iter, inserted] =
+        m_areas.emplace(addr, area->flags, new_end, d_get(area->mapped_file), new_file_offset);
+
+    assert(inserted);
+    return iter;
+}
+
+int mm_list::unmap(iterator area, bool should_invalidate_tlb) {
+    using namespace paging;
+
+    bool should_use_invlpg = area->end - area->start <= 0x4000;
+    auto range = vaddr_range{m_pt, area->start, area->end};
+    uintptr_t cur_addr = area->start;
+
+    // TODO: write back dirty pages
+    for (auto pte : range) {
+        free_page(pte.pfn());
+        pte.clear();
+
+        if (should_invalidate_tlb && should_use_invlpg) {
+            asm volatile("invlpg (%0)" : : "r"(cur_addr) : "memory");
+            cur_addr += 0x1000;
+        }
+    }
+
+    if (should_invalidate_tlb && !should_use_invlpg)
+        __invalidate_all_tlb();
+
+    return 0;
+}
+
+int mm_list::unmap(uintptr_t start, std::size_t length, bool should_invalidate_tlb) {
+    // standard says that addr and len MUST be
+    // page-aligned or the call is invalid
+    if (start & 0xfff)
+        return -EINVAL;
+
+    uintptr_t end = (start + length + 0xfff) & ~0xfff;
+
+    // check address validity
+    if (end > KERNEL_SPACE_START)
+        return -EINVAL;
+    if (end > USER_SPACE_MEMORY_TOP)
+        return -ENOMEM;
+
+    auto iter = m_areas.lower_bound(start);
+    auto iter_end = m_areas.upper_bound(end);
+
+    // start <= iter <= end a.k.a. !(start > *iter) && !(*iter > end)
+    while (iter != iter_end) {
+        // start == iter:
+        // start is between (iter->start, iter->end)
+        //
+        // strip out the area before start
+        if (!(start < *iter) && start != iter->start)
+            iter = split(iter, start);
+
+        // iter.end <= end
+        // it is safe to unmap the area directly
+        if (*iter < end) {
+            if (int ret = unmap(iter, should_invalidate_tlb); ret != 0)
+                return ret;
+
+            iter = m_areas.erase(iter);
+            continue;
+        }
+
+        // end == iter:
+        // end is between [iter->start, iter->end)
+        //
+        // if end == iter->start, no need to strip the area
+        if (end == iter->start) {
+            ++iter;
+            continue;
+        }
+
+        (void)split(iter, end);
+        if (int ret = unmap(iter, should_invalidate_tlb); ret != 0)
+            return ret;
+
+        iter = m_areas.erase(iter);
+
+        // no need to check areas after this
+        break;
+    }
+
+    return 0;
+}
+
+int mm_list::mmap(const map_args& args) {
+    auto& vaddr = args.vaddr;
+    auto& length = args.length;
+    auto& file = args.file;
+    auto& foff = args.file_offset;
+    auto& flags = args.flags;
+
+    assert((vaddr & 0xfff) == 0 && (foff & 0xfff) == 0);
+    assert((length & 0xfff) == 0 && length != 0);
+
+    if (!is_avail(vaddr, length))
+        return -EEXIST;
+
+    using namespace kernel::mem::paging;
+
+    // PA_RW is set during page fault while PA_NXE is preserved
+    // so we set PA_NXE now
+    psattr_t attributes = PA_US;
+    if (!(flags & MM_EXECUTE))
+        attributes |= PA_NXE;
+
+    if (flags & MM_MAPPED) {
+        assert(file);
+
+        auto [area, inserted] =
+            m_areas.emplace(vaddr, flags & ~MM_INTERNAL_MASK, vaddr + length, d_get(file), foff);
+        assert(inserted);
+
+        attributes |= PA_MMAPPED_PAGE;
+        for (auto pte : vaddr_range{m_pt, vaddr, vaddr + length})
+            pte.set(attributes, EMPTY_PAGE_PFN);
+    } else if (flags & MM_ANONYMOUS) {
+        // private mapping of zero-filled pages
+        // TODO: shared mapping
+        auto [area, inserted] = m_areas.emplace(vaddr, (flags & ~MM_INTERNAL_MASK), vaddr + length);
+        assert(inserted);
+
+        attributes |= PA_ANONYMOUS_PAGE;
+        for (auto pte : vaddr_range{m_pt, vaddr, vaddr + length})
+            pte.set(attributes, EMPTY_PAGE_PFN);
+    } else {
+        return -EINVAL;
+    }
+
+    return 0;
+}

+ 4 - 5
src/kernel/mem/page_table.rs

@@ -203,12 +203,11 @@ impl PageTable {
         let page = Page::alloc_one();
         page.zero();
 
+        // TODO: copy only the kernel space mappings.
         let kernel_space_page_table = CachedPP::new(KERNEL_PML4 as usize);
-        unsafe {
-            page.as_cached()
-                .as_ptr::<u8>()
-                .copy_from_nonoverlapping(kernel_space_page_table.as_ptr(), page.len())
-        };
+
+        page.as_cached().as_mut_slice::<u64>(512)[256..]
+            .copy_from_slice(&kernel_space_page_table.as_mut_slice(512)[256..]);
 
         Self { page }
     }

+ 0 - 5
src/kinit.cpp

@@ -49,9 +49,6 @@ static inline void enable_sse() {
 static inline void setup_early_kernel_page_table() {
     using namespace kernel::mem::paging;
 
-    // remove temporary mapping
-    KERNEL_PAGE_TABLE[0x000].clear();
-
     constexpr auto idx = idx_all(0xffffffffc0200000ULL);
 
     auto pdpt = KERNEL_PAGE_TABLE[std::get<1>(idx)].parse();
@@ -115,8 +112,6 @@ static inline void setup_buddy(uintptr_t addr_max) {
         mem::paging::create_zone(start, end);
     }
 
-    // free .stage1
-    create_zone(0x1000, 0x2000);
     // unused space
     create_zone(0x9000, 0x80000);
     create_zone(0x100000, 0x200000);

+ 56 - 5
src/lib.rs

@@ -23,6 +23,11 @@ mod rcu;
 mod sync;
 
 use alloc::ffi::CString;
+use core::{
+    alloc::{GlobalAlloc, Layout},
+    arch::{asm, global_asm},
+    sync::atomic::AtomicU32,
+};
 use elf::ParsedElf32;
 use kernel::{
     mem::{
@@ -65,11 +70,6 @@ extern "C" {
     fn init_pci();
 }
 
-use core::{
-    alloc::{GlobalAlloc, Layout},
-    arch::{asm, global_asm},
-};
-
 struct Allocator {}
 unsafe impl GlobalAlloc for Allocator {
     unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
@@ -107,6 +107,55 @@ global_asm!(
 
 extern "C" {
     fn to_init_process();
+    fn boot_semaphore();
+}
+
+fn rdmsr(msr: u32) -> u64 {
+    let edx: u32;
+    let eax: u32;
+
+    unsafe {
+        asm!(
+            "rdmsr",
+            in("ecx") msr,
+            out("eax") eax,
+            out("edx") edx,
+        )
+    };
+
+    (edx as u64) << 32 | eax as u64
+}
+
+fn bootstrap_cpus() {
+    let apic_base = rdmsr(0x1b);
+    assert_eq!(apic_base & 0x800, 0x800, "LAPIC not enabled");
+    assert_eq!(apic_base & 0x100, 0x100, "Is not bootstrap processor");
+
+    let apic_base = apic_base & !0xfff;
+    println_debug!("IA32_APIC_BASE: {apic_base:#x}");
+
+    let apic_base = CachedPP::new(apic_base as usize);
+    let spurious = apic_base.offset(0xf0).as_ptr::<u32>();
+    let icr = apic_base.offset(0x300).as_ptr::<u32>();
+
+    println_debug!("SPURIOUS: {:#x}", unsafe { spurious.read() });
+
+    unsafe { icr.write_volatile(0xc4500) };
+
+    while unsafe { icr.read_volatile() } & 0x1000 != 0 {
+        unsafe { asm!("pause") };
+    }
+
+    unsafe { icr.write_volatile(0xc4601) };
+
+    while unsafe { icr.read_volatile() } & 0x1000 != 0 {
+        unsafe { asm!("pause") };
+    }
+
+    let sem = unsafe { AtomicU32::from_ptr(boot_semaphore as *mut _) };
+    while sem.load(core::sync::atomic::Ordering::Acquire) != 3 {}
+
+    println_info!("Processors startup finished");
 }
 
 #[no_mangle]
@@ -167,6 +216,8 @@ extern "C" fn init_process(early_kstack_pfn: usize) {
     fs::procfs::init();
     fs::fat32::init();
 
+    bootstrap_cpus();
+
     let (ip, sp) = {
         // mount fat32 /mnt directory
         let fs_context = FsContext::get_current();