123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440 |
- #include <assert.h>
- #include <string.h>
- #include <types/list.hpp>
- #include <kernel/async/lock.hpp>
- #include <kernel/log.hpp>
- #include <kernel/mem/mm_list.hpp>
- #include <kernel/mem/paging.hpp>
- #include <kernel/mem/slab.hpp>
- #include <kernel/mem/vm_area.hpp>
- #include <kernel/process.hpp>
- #include <kernel/procfs.hpp>
- using namespace types::list;
- using namespace kernel::async;
- using namespace kernel::mem::paging;
- static inline void __page_fault_die(uintptr_t vaddr) {
- kmsgf("[kernel] kernel panic: invalid memory access to %p", vaddr);
- freeze();
- }
- static inline PSE __parse_pse(PSE pse, bool priv) {
- auto attr = priv ? PA_KERNEL_PAGE_TABLE : PA_USER_PAGE_TABLE;
- if (!(pse.attributes() & PA_P))
- pse.set(attr, alloc_page_table());
- return pse.parse();
- }
- static struct zone_info {
- page* next;
- std::size_t count;
- } zones[52];
- static mutex zone_lock;
- constexpr unsigned _msb(std::size_t x) {
- unsigned n = 0;
- while (x >>= 1)
- n++;
- return n;
- }
- constexpr pfn_t buddy(pfn_t pfn, unsigned order) {
- return pfn ^ (1 << (order + 12));
- }
- constexpr pfn_t parent(pfn_t pfn, unsigned order) {
- return pfn & ~(1 << (order + 12));
- }
- // call with zone_lock held
- static inline void _zone_list_insert(unsigned order, page* zone) {
- assert(zone->flags & PAGE_PRESENT && zone->flags & PAGE_BUDDY);
- assert((zone->flags & 0xff) == 0);
- zone->flags |= order;
- zones[order].count++;
- list_insert(&zones[order].next, zone);
- }
- // call with zone_lock held
- static inline void _zone_list_remove(unsigned order, page* zone) {
- assert(zone->flags & PAGE_PRESENT && zone->flags & PAGE_BUDDY);
- assert(zones[order].count > 0 && (zone->flags & 0xff) == order);
- zone->flags &= ~0xff;
- zones[order].count--;
- list_remove(&zones[order].next, zone);
- }
- // call with zone_lock held
- static inline page* _zone_list_get(unsigned order) {
- if (zones[order].count == 0)
- return nullptr;
- zones[order].count--;
- auto* pg = list_get(&zones[order].next);
- assert((pg->flags & 0xff) == order);
- return pg;
- }
- // where order represents power of 2
- // call with zone_lock held
- static inline page* _create_zone(pfn_t pfn, unsigned order) {
- page* zone = pfn_to_page(pfn);
- assert(zone->flags & PAGE_PRESENT);
- zone->flags |= PAGE_BUDDY;
- _zone_list_insert(order, zone);
- return zone;
- }
- // call with zone_lock held
- static inline void _split_zone(page* zone, unsigned order,
- unsigned target_order) {
- while (order > target_order) {
- pfn_t pfn = page_to_pfn(zone);
- _create_zone(buddy(pfn, order - 1), order - 1);
- order--;
- }
- zone->flags &= ~0xff;
- zone->flags |= target_order;
- }
- // call with zone_lock held
- static inline page* _alloc_zone(unsigned order) {
- for (unsigned i = order; i < 52; ++i) {
- auto zone = _zone_list_get(i);
- if (!zone)
- continue;
- increase_refcount(zone);
- if (i > order)
- _split_zone(zone, i, order);
- assert(zone->flags & PAGE_PRESENT && zone->flags & PAGE_BUDDY);
- return zone;
- }
- return nullptr;
- }
- void kernel::mem::paging::create_zone(uintptr_t start, uintptr_t end) {
- start += (4096 - 1);
- start >>= 12;
- end >>= 12;
- if (start >= end)
- return;
- lock_guard_irq lock{zone_lock};
- unsigned long low = start;
- for (unsigned i = 0; i < _msb(end); ++i, low >>= 1) {
- if (!(low & 1))
- continue;
- _create_zone(low << (12 + i), i);
- low++;
- }
- low = 1 << _msb(end);
- while (low < end) {
- unsigned order = _msb(end - low);
- _create_zone(low << 12, order);
- low |= (1 << order);
- }
- }
- void kernel::mem::paging::mark_present(uintptr_t start, uintptr_t end) {
- start >>= 12;
- end += (4096 - 1);
- end >>= 12;
- while (start < end)
- PAGE_ARRAY[start++].flags |= PAGE_PRESENT;
- }
- page* kernel::mem::paging::alloc_pages(unsigned order) {
- lock_guard_irq lock{zone_lock};
- auto* zone = _alloc_zone(order);
- if (!zone)
- freeze();
- return zone;
- }
- page* kernel::mem::paging::alloc_page() {
- return alloc_pages(0);
- }
- pfn_t kernel::mem::paging::alloc_page_table() {
- page* zone = alloc_page();
- pfn_t pfn = page_to_pfn(zone);
- memset(physaddr<void>{pfn}, 0x00, 0x1000);
- return pfn;
- }
- void kernel::mem::paging::free_pages(page* pg, unsigned order) {
- assert((pg->flags & 0xff) == order);
- // TODO: atomic
- if (!(pg->flags & PAGE_BUDDY) || --pg->refcount)
- return;
- lock_guard_irq lock{zone_lock};
- while (order < 52) {
- pfn_t pfn = page_to_pfn(pg);
- pfn_t buddy_pfn = buddy(pfn, order);
- page* buddy_page = pfn_to_page(buddy_pfn);
- if (!(buddy_page->flags & PAGE_BUDDY))
- break;
- if ((buddy_page->flags & 0xff) != order)
- break;
- if (buddy_page->refcount)
- break;
- _zone_list_remove(order, buddy_page);
- if (buddy_page < pg)
- std::swap(buddy_page, pg);
- buddy_page->flags &= ~(PAGE_BUDDY | 0xff);
- order++;
- }
- pg->flags &= ~0xff;
- _zone_list_insert(order, pg);
- }
- void kernel::mem::paging::free_page(page* page) {
- return free_pages(page, 0);
- }
- void kernel::mem::paging::free_pages(pfn_t pfn, unsigned order) {
- return free_pages(pfn_to_page(pfn), order);
- }
- void kernel::mem::paging::free_page(pfn_t pfn) {
- return free_page(pfn_to_page(pfn));
- }
- pfn_t kernel::mem::paging::page_to_pfn(page* _page) {
- return (pfn_t)(_page - PAGE_ARRAY) * 0x1000;
- }
- page* kernel::mem::paging::pfn_to_page(pfn_t pfn) {
- return PAGE_ARRAY + pfn / 0x1000;
- }
- void kernel::mem::paging::increase_refcount(page* pg) {
- pg->refcount++;
- }
- void kernel::mem::paging::handle_page_fault(unsigned long err) {
- using namespace kernel::mem;
- using namespace paging;
- uintptr_t vaddr;
- asm volatile("mov %%cr2, %0" : "=g"(vaddr) : :);
- auto& mms = current_process->mms;
- auto* mm_area = mms.find(vaddr);
- if (!mm_area) [[unlikely]] {
- // user access to address that does not exist
- if (err & PAGE_FAULT_U)
- kill_current(SIGSEGV);
- __page_fault_die(vaddr);
- }
- // user access to a present page caused the fault
- // check access rights
- if (err & PAGE_FAULT_U && err & PAGE_FAULT_P) {
- // write to read only pages
- if (err & PAGE_FAULT_W && !(mm_area->flags & MM_WRITE))
- kill_current(SIGSEGV);
- // execute from non-executable pages
- if (err & PAGE_FAULT_I && !(mm_area->flags & MM_EXECUTE))
- kill_current(SIGSEGV);
- }
- auto idx = idx_all(vaddr);
- auto pe = mms.get_page_table()[std::get<1>(idx)];
- assert(pe.attributes() & PA_P);
- pe = pe.parse()[std::get<2>(idx)];
- assert(pe.attributes() & PA_P);
- pe = pe.parse()[std::get<3>(idx)];
- assert(pe.attributes() & PA_P);
- pe = pe.parse()[std::get<4>(idx)];
- bool mmapped = mm_area->flags & MM_MAPPED;
- assert(!mmapped || mm_area->mapped_file);
- if (!(err & PAGE_FAULT_P) && !mmapped) [[unlikely]]
- __page_fault_die(vaddr);
- pfn_t pfn = pe.pfn();
- auto attr = pe.attributes();
- page* pg = pfn_to_page(pfn);
- if (attr & PA_COW) {
- attr &= ~PA_COW;
- if (mm_area->flags & MM_WRITE)
- attr |= PA_RW;
- else
- attr &= ~PA_RW;
- // if it is a dying page
- // TODO: use atomic
- if (pg->refcount == 1) {
- pe.set(attr, pfn);
- return;
- }
- // duplicate the page
- page* new_page = alloc_page();
- pfn_t new_pfn = page_to_pfn(new_page);
- physaddr<void> new_page_addr{new_pfn};
- if (attr & PA_ANON)
- memset(new_page_addr, 0x00, 0x1000);
- else
- memcpy(new_page_addr, physaddr<void>{pfn}, 0x1000);
- attr &= ~(PA_A | PA_ANON);
- --pg->refcount;
- pe.set(attr, new_pfn);
- pfn = new_pfn;
- }
- if (attr & PA_MMAP) {
- attr |= PA_P;
- size_t offset = (vaddr & ~0xfff) - mm_area->start;
- char* data = physaddr<char>{pfn};
- int n = fs::read(mm_area->mapped_file, data, 4096,
- mm_area->file_offset + offset, 4096);
- // TODO: send SIGBUS if offset is greater than real size
- if (n != 4096)
- memset(data + n, 0x00, 4096 - n);
- // TODO: shared mapping
- attr &= ~PA_MMAP;
- pe.set(attr, pfn);
- }
- }
- vaddr_range::vaddr_range(pfn_t pt, uintptr_t start, uintptr_t end, bool priv)
- : n{start >= end ? 0 : ((end - start) >> 12)}
- , idx4{!n ? 0 : idx_p4(start)}
- , idx3{!n ? 0 : idx_p3(start)}
- , idx2{!n ? 0 : idx_p2(start)}
- , idx1{!n ? 0 : idx_p1(start)}
- , pml4{!n ? PSE{0} : PSE{pt}}
- , pdpt{!n ? PSE{0} : __parse_pse(pml4[idx4], priv)}
- , pd{!n ? PSE{0} : __parse_pse(pdpt[idx3], priv)}
- , pt{!n ? PSE{0} : __parse_pse(pd[idx2], priv)}
- , m_start{!n ? 0 : start}
- , m_end{!n ? 0 : end}
- , is_privilege{!n ? false : priv} {}
- vaddr_range::vaddr_range(std::nullptr_t)
- : n{}
- , idx4{}
- , idx3{}
- , idx2{}
- , idx1{}
- , pml4{0}
- , pdpt{0}
- , pd{0}
- , pt{0}
- , m_start{}
- , m_end{}
- , is_privilege{} {}
- vaddr_range vaddr_range::begin() const noexcept {
- return *this;
- }
- vaddr_range vaddr_range::end() const noexcept {
- return vaddr_range{nullptr};
- }
- PSE vaddr_range::operator*() const noexcept {
- return pt[idx1];
- }
- vaddr_range& vaddr_range::operator++() {
- --n;
- if ((idx1 = (idx1 + 1) % 512) != 0)
- return *this;
- do {
- if ((idx2 = (idx2 + 1) % 512) != 0)
- break;
- do {
- if ((idx3 = (idx3 + 1) % 512) != 0)
- break;
- idx4 = (idx4 + 1) % 512;
- // if idx4 is 0 after update, we have an overflow
- assert(idx4 != 0);
- pdpt = __parse_pse(pml4[idx4], is_privilege);
- } while (false);
- pd = __parse_pse(pdpt[idx3], is_privilege);
- } while (false);
- pt = __parse_pse(pd[idx2], is_privilege);
- return *this;
- }
- vaddr_range::operator bool() const noexcept {
- return n;
- }
- bool vaddr_range::operator==(const vaddr_range& other) const noexcept {
- return n == other.n;
- }
- extern "C" isize real_dump_buddy(const zone_info* zones, u8* buf,
- usize buf_size);
- static isize _dump_buddy(u8* buf, usize buf_size) {
- return real_dump_buddy(zones, buf, buf_size);
- }
- static void _init_procfs_files() {
- auto* root = kernel::procfs::root();
- kernel::procfs::create(root, "buddyinfo", _dump_buddy, nullptr);
- }
- __attribute__((used))
- SECTION(".late_init") void (*const _paging_late_init)() = _init_procfs_files;
|