Forráskód Böngészése

feat(ahci): add ahci controller driver

greatbridf 1 éve
szülő
commit
b4ec221a53

+ 5 - 2
CMakeLists.txt

@@ -43,10 +43,12 @@ set(KERNEL_MAIN_SOURCES src/fs/fat.cpp
                         src/kernel/tty.cpp
                         src/kernel/syscall.cpp
                         src/kernel/mem.cpp
+                        src/kernel/module.cc
                         src/kernel/vfs.cpp
                         src/kernel/vga.cpp
-                        src/kernel/hw/ata.cpp
+                        src/kernel/hw/ahci.cc
                         src/kernel/hw/keyboard.cpp
+                        src/kernel/hw/pci.cc
                         src/kernel/hw/serial.cpp
                         src/kernel/hw/timer.c
                         src/kernel/event/event.cpp
@@ -67,11 +69,12 @@ set(KERNEL_MAIN_SOURCES src/fs/fat.cpp
                         include/kernel/syscall.hpp
                         include/kernel/mem.h
                         include/kernel/mm.hpp
+                        include/kernel/module.hpp
                         include/kernel/vfs.hpp
                         include/kernel/vga.hpp
                         include/kernel/signal.hpp
-                        include/kernel/hw/ata.hpp
                         include/kernel/hw/keyboard.h
+                        include/kernel/hw/pci.hpp
                         include/kernel/hw/port.hpp
                         include/kernel/hw/serial.h
                         include/kernel/hw/timer.h

+ 1 - 0
include/kernel/errno.h

@@ -19,6 +19,7 @@ extern uint32_t* _get_errno(void);
 #define ENOENT 2
 #define ESRCH 3
 #define EINTR 4
+#define EIO 5
 #define EBADF 9
 #define ECHILD 10
 #define ENOMEM 12

+ 0 - 61
include/kernel/hw/ata.hpp

@@ -1,61 +0,0 @@
-#pragma once
-
-#include <asm/port_io.h>
-#include <kernel/hw/port.hpp>
-#include <kernel/syscall.hpp>
-#include <types/cplusplus.hpp>
-
-constexpr port_id_t ATA_PRIMARY_BUS_BASE = 0x1f0;
-constexpr port_id_t ATA_PRIMARY_BUS_DEV_CONTROL_OR_ALTER_STATUS = 0x1f0;
-
-constexpr port_id_t ATA_SECONDARY_BUS_BASE = 0x170;
-constexpr port_id_t ATA_SECONDARY_BUS_DEV_CONTROL_OR_ALTER_STATUS = 0x1f0;
-
-namespace hw {
-class ata {
-public:
-    union stat_t {
-        uint8_t v;
-        struct {
-            uint8_t err : 1;
-            uint8_t idx : 1;
-            uint8_t corr : 1;
-            uint8_t drq : 1;
-            uint8_t srv : 1;
-            uint8_t df : 1;
-            uint8_t rdy : 1;
-            uint8_t bsy : 1;
-        } in;
-    };
-
-private:
-    p16 data;
-    p16r error;
-    p16w feats;
-    p8 count;
-    p8 lbalo;
-    p8 lbami;
-    p8 lbahi;
-    p8 drive;
-    p8r stats;
-    p8w comms;
-
-    uint8_t slave_flag;
-
-public:
-    ata(port_id_t port_base);
-
-    stat_t status(void) const;
-
-    bool identify(void) const;
-    int select(bool master);
-
-    size_t read_data(char* buf, size_t n) const;
-    size_t write_data(const char* buf, size_t n) const;
-
-    int read_sector(char* buf, uint32_t lba_low, uint16_t lba_high) const;
-    int write_sector(const char* buf, uint32_t lba_low, uint16_t lba_high) const;
-};
-
-void init_ata(void);
-} // namespace hw

+ 98 - 0
include/kernel/hw/pci.hpp

@@ -0,0 +1,98 @@
+#pragma once
+
+#include <functional>
+
+#include <stdint.h>
+
+namespace kernel::kinit {
+
+void init_pci();
+
+} // namespace kernel::kinit
+
+namespace kernel::hw::pci {
+
+struct bar_mmio {
+    uint32_t always_zero : 1;
+    uint32_t type : 2;
+    uint32_t prefetchable : 1;
+    uint32_t base_address : 28;
+};
+
+struct bar_io {
+    uint32_t always_one : 1;
+    uint32_t reserved : 1;
+    uint32_t base_address : 30;
+};
+
+union bar {
+    bar_mmio mmio;
+    bar_io io;
+};
+
+struct config_reg {
+    uint32_t addr_base;
+
+    explicit constexpr config_reg(
+        uint32_t bus, uint32_t dev, uint32_t func)
+        : addr_base(0x80000000U | (bus << 16) | (dev << 11) | (func << 8))
+    { }
+
+    // offset is in range from 0x00 to 0xff
+    uint32_t read32(uint32_t offset) const;
+    uint16_t read16(uint16_t offset) const;
+
+    // read n-th 32-bit register
+    uint32_t operator[](uint32_t n) const;
+};
+
+struct device_header_base {
+    uint16_t vendor;
+    uint16_t device;
+    uint16_t command;
+    uint16_t status;
+    uint8_t revision_id;
+    uint8_t prog_if;
+    uint8_t subclass;
+    uint8_t class_code;
+    uint8_t cache_line_size;
+    uint8_t latency_timer;
+    uint8_t header_type;
+    uint8_t bist;
+};
+
+struct device_header_type0 {
+    bar bars[6];
+    uint32_t cardbus_cis_pointer;
+    uint16_t subsystem_vendor_id;
+    uint16_t subsystem_id;
+    uint32_t expansion_rom_base_address;
+    uint8_t capabilities_pointer;
+    uint8_t reserved[7];
+    uint8_t interrupt_line;
+    uint8_t interrupt_pin;
+    uint8_t min_grant;
+    uint8_t max_latency;
+};
+
+class pci_device {
+public:
+    config_reg reg;
+
+    uint16_t vendor;
+    uint16_t device;
+
+    uint8_t revision_id;
+    uint8_t subclass;
+    uint8_t class_code;
+    uint8_t header_type;
+
+    explicit pci_device(config_reg reg);
+};
+
+using driver_t = std::function<int(pci_device*)>;
+
+pci_device* probe_device(uint8_t bus, uint8_t device, uint8_t function);
+int register_driver(uint16_t vendor, uint16_t device, driver_t drv);
+
+} // namespace kernel::hw::pci

+ 85 - 62
include/kernel/hw/port.hpp

@@ -1,71 +1,94 @@
 #pragma once
 
 #include <stdint.h>
-#include <type_traits>
 
-namespace hw {
-template <typename port_size_t, bool r = true, bool w = true>
-class port {
-private:
+namespace kernel::hw {
+
+inline uint32_t inl(uint16_t pn)
+{
+    uint32_t ret;
+    asm("inl %1, %0"
+        : "=a"(ret)
+        : "d"(pn));
+    return ret;
+}
+
+inline uint32_t outl(uint16_t pn, uint32_t n)
+{
+    asm("outl %1, %0"
+        :
+        : "d"(pn), "a"(n));
+    return n;
+}
+
+inline uint16_t inw(uint16_t pn)
+{
+    uint16_t ret;
+    asm("inw %1, %0"
+        : "=a"(ret)
+        : "d"(pn));
+    return ret;
+}
+
+inline uint16_t outw(uint16_t pn, uint16_t n)
+{
+    asm("outw %1, %0"
+        :
+        : "d"(pn), "a"(n));
+    return n;
+}
+
+inline uint8_t inb(uint16_t pn)
+{
+    uint8_t ret;
+    asm("inb %1, %0"
+        : "=a"(ret)
+        : "d"(pn));
+    return ret;
+}
+
+inline uint8_t outb(uint16_t pn, uint8_t n)
+{
+    asm("outb %1, %0"
+        :
+        : "d"(pn), "a"(n));
+    return n;
+}
+
+struct p32 {
+    uint16_t mp;
+
+    explicit constexpr p32(uint16_t p) : mp(p) { }
+    inline uint32_t operator*() const { return inl(mp); }
+    inline uint32_t operator=(uint32_t n) const { return outl(mp, n); }
+};
+
+struct p16 {
+    uint16_t mp;
+
+    explicit constexpr p16(uint16_t p) : mp(p) { }
+    inline uint16_t operator*() const { return inw(mp); }
+    inline uint16_t operator=(uint16_t n) const { return outw(mp, n); }
+};
+
+struct p8 {
     uint16_t mp;
 
-public:
-    explicit port(uint16_t p)
-        : mp(p)
-    {
-    }
-
-    port_size_t operator*(void) const
-    {
-        static_assert(
-            std::is_same_v<port_size_t, uint8_t> || std::is_same_v<port_size_t, uint16_t>,
-            "this type is not implemented yet.");
-        port_size_t ret;
-        if constexpr (std::is_same_v<port_size_t, uint8_t>)
-            asm volatile(
-                "inb %1, %0"
-                : "=a"(ret)
-                : "d"(mp));
-        if constexpr (std::is_same_v<port_size_t, uint16_t>)
-            asm volatile(
-                "inw %1, %0"
-                : "=a"(ret)
-                : "d"(mp));
-        return ret;
-    }
-
-    port_size_t operator=(port_size_t n) const
-    {
-        static_assert(
-            std::is_same_v<port_size_t, uint8_t> || std::is_same_v<port_size_t, uint16_t>,
-            "this type is not implemented yet.");
-        if constexpr (std::is_same_v<port_size_t, uint8_t>)
-            asm volatile(
-                "outb %1, %0"
-                :
-                : "d"(mp), "a"(n));
-        if constexpr (std::is_same_v<port_size_t, uint16_t>)
-            asm volatile(
-                "outw %1, %0"
-                :
-                : "d"(mp), "a"(n));
-        return n;
-    }
+    explicit constexpr p8(uint16_t p) : mp(p) { }
+    inline uint8_t operator*() const { return inb(mp); }
+    inline uint8_t operator=(uint8_t n) const { return outb(mp, n); }
 };
 
-using p8 = port<uint8_t>;
-using p8r = port<uint8_t, true, false>;
-using p8w = port<uint8_t, false, true>;
-using p16 = port<uint16_t>;
-using p16r = port<uint16_t, true, false>;
-using p16w = port<uint16_t, false, true>;
-
-template <>
-uint8_t p8r::operator=(uint8_t n) const = delete;
-template <>
-uint8_t p8w::operator*(void) const = delete;
-template <>
-uint16_t p16r::operator=(uint16_t n) const = delete;
-template <>
-uint16_t p16w::operator*(void) const = delete;
+} // namespace kernel::hw
+
+namespace hw {
+
+// for backward compatibility
+using p8 = kernel::hw::p8;
+using p8r = kernel::hw::p8;
+using p8w = kernel::hw::p8;
+using p16 = kernel::hw::p16;
+using p16r = kernel::hw::p16;
+using p16w = kernel::hw::p16;
+
 } // namespace hw

+ 3 - 3
include/kernel/mm.hpp

@@ -110,7 +110,7 @@ void __free_raw_page(page_t pg);
 
 namespace kernel {
 
-void* pmap(page_t pg);
+void* pmap(page_t pg, bool cached = true);
 void pfree(page_t pg);
 
 class paccess : public types::non_copyable {
@@ -123,10 +123,10 @@ public:
     paccess(paccess&&) = delete;
     paccess& operator=(paccess&&) = delete;
 
-    constexpr explicit paccess(page_t pg)
+    constexpr explicit paccess(page_t pg, bool cached = true)
         : m_pg(pg)
     {
-        m_ptr = pmap(pg);
+        m_ptr = pmap(pg, cached);
     }
     constexpr void* ptr(void) const
     {

+ 34 - 0
include/kernel/module.hpp

@@ -0,0 +1,34 @@
+#pragma once
+
+#include <types/types.h>
+
+#define INTERNAL_MODULE(name, func) \
+    SECTION(".kmods") __attribute__((used)) \
+    kernel::module::module_loader const name = (func)
+
+namespace kernel::module {
+
+struct module {
+    const char* const name;
+
+    explicit module(const char* name);
+
+    virtual ~module() = default;
+    module(const module&) = delete;
+    module& operator=(const module&) = delete;
+
+    virtual int init() = 0;
+};
+
+using module_loader = module* (*)();
+
+constexpr int MODULE_SUCCESS = 0;
+constexpr int MODULE_FAILED = 1;
+constexpr int MODULE_DELAYED = 2;
+
+// TODO: unique_ptr and Deleter
+int insmod(module* mod);
+
+extern "C" module_loader kmod_loaders_start[];
+
+} // namespace kernel::module

+ 38 - 26
include/kernel/vfs.hpp

@@ -55,29 +55,37 @@ struct inode {
     gid_t gid;
 };
 
-#define SN_INVALID (0xffffffff)
-union node_t {
-    uint32_t v;
-    struct {
-        uint32_t major : 16;
-        uint32_t minor : 16;
-    } in;
-};
+using node_t = uint32_t;
+
+#define NODE_MAJOR(node) ((node) >> 16)
+#define NODE_MINOR(node) ((node) & 0xffff)
+constexpr node_t NODE_INVALID = -1U;
 
-struct special_node;
+constexpr node_t make_node(uint32_t major, uint32_t minor)
+{
+    return (major << 16) | (minor & 0xffff);
+}
 
-typedef size_t (*special_node_read)(special_node* sn, char* buf, size_t buf_size, size_t offset, size_t n);
-typedef size_t (*special_node_write)(special_node* sn, const char* buf, size_t offset, size_t n);
+// buf, buf_size, offset, cnt
+using blkdev_read = std::function<ssize_t(char*, std::size_t, std::size_t, std::size_t)>;
 
-struct special_node_ops {
-    special_node_read read;
-    special_node_write write;
+// buf, offset, cnt
+using blkdev_write = std::function<ssize_t(const char*, std::size_t, std::size_t)>;
+
+struct blkdev_ops {
+    blkdev_read read;
+    blkdev_write write;
 };
 
-struct special_node {
-    special_node_ops ops;
-    uint32_t data1;
-    uint32_t data2;
+// buf, buf_size, cnt
+using chrdev_read = std::function<ssize_t(char*, std::size_t, std::size_t)>;
+
+// buf, cnt
+using chrdev_write = std::function<ssize_t(const char*, std::size_t)>;
+
+struct chrdev_ops {
+    chrdev_read read;
+    chrdev_write write;
 };
 
 struct PACKED user_dirent {
@@ -203,7 +211,7 @@ public:
     virtual size_t inode_read(inode* file, char* buf, size_t buf_size, size_t offset, size_t n);
     virtual size_t inode_write(inode* file, const char* buf, size_t offset, size_t n);
     virtual int inode_mkfile(dentry* dir, const char* filename, mode_t mode);
-    virtual int inode_mknode(dentry* dir, const char* filename, union node_t sn);
+    virtual int inode_mknode(dentry* dir, const char* filename, mode_t mode, node_t sn);
     virtual int inode_rmfile(dentry* dir, const char* filename);
     virtual int inode_mkdir(dentry* dir, const char* dirname);
     virtual int inode_stat(dentry* dent, statx* buf, unsigned int mask);
@@ -308,19 +316,23 @@ struct fifo_file : public virtual file {
 
 inline fs::vfs::dentry* fs_root;
 
-void register_special_block(uint16_t major,
-    uint16_t minor,
-    special_node_read read,
-    special_node_write write,
-    uint32_t data1,
-    uint32_t data2);
+int register_block_device(node_t node, blkdev_ops ops);
+int register_char_device(node_t node, chrdev_ops ops);
+
+void partprobe();
+
+ssize_t block_device_read(node_t node, char* buf, size_t buf_size, size_t offset, size_t n);
+ssize_t block_device_write(node_t node, const char* buf, size_t offset, size_t n);
+
+ssize_t char_device_read(node_t node, char* buf, size_t buf_size, size_t n);
+ssize_t char_device_write(node_t node, const char* buf, size_t n);
 
 vfs* register_fs(vfs* fs);
 
 size_t vfs_read(inode* file, char* buf, size_t buf_size, size_t offset, size_t n);
 size_t vfs_write(inode* file, const char* buf, size_t offset, size_t n);
 int vfs_mkfile(fs::vfs::dentry* dir, const char* filename, mode_t mode);
-int vfs_mknode(fs::vfs::dentry* dir, const char* filename, node_t sn);
+int vfs_mknode(fs::vfs::dentry* dir, const char* filename, mode_t mode, node_t sn);
 int vfs_rmfile(fs::vfs::dentry* dir, const char* filename);
 int vfs_mkdir(fs::vfs::dentry* dir, const char* dirname);
 int vfs_stat(fs::vfs::dentry* dent, statx* stat, unsigned int mask);

+ 7 - 0
src/kernel.ld

@@ -76,6 +76,13 @@ SECTIONS
         *(.rodata)
         *(.rodata*)
 
+        kmod_loaders_start = .;
+
+        *(.kmods)
+
+        __kmod_loaders_end = .;
+        LONG(0);
+
         . = ALIGN(16);
 
         bss_addr = .;

+ 527 - 0
src/kernel/hw/ahci.cc

@@ -0,0 +1,527 @@
+#include "kernel/vfs.hpp"
+#include <vector>
+#include <cstddef>
+#include <algorithm>
+
+#include <kernel/log.hpp>
+#include <kernel/mm.hpp>
+#include <kernel/module.hpp>
+#include <kernel/hw/pci.hpp>
+#include <kernel/irq.hpp>
+#include <kernel/errno.h>
+
+#include <types/size.h>
+
+#include <stdint.h>
+
+#define SPIN(cond, spin) \
+    (spin) = 0; \
+    while ((cond) && (spin) < MAX_SPINS) ++(spin); \
+    if ((spin) == MAX_SPINS)
+
+using namespace kernel::module;
+using namespace kernel::hw::pci;
+
+constexpr uint32_t MAX_SPINS = 100000;
+
+constexpr uint16_t VENDOR_INTEL = 0x8086;
+constexpr uint16_t DEVICE_AHCI = 0x2922;
+
+constexpr uint32_t PCI_REG_ABAR = 0x09;
+
+constexpr uint32_t ATA_DEV_BSY = 0x08;
+constexpr uint32_t ATA_DEV_DRQ = 0x04;
+
+constexpr uint32_t PORT_CMD_ST = 0x00000001;
+constexpr uint32_t PORT_CMD_FRE = 0x00000010;
+constexpr uint32_t PORT_CMD_FR = 0x00004000;
+constexpr uint32_t PORT_CMD_CR = 0x00008000;
+
+namespace ahci {
+
+typedef volatile struct hba_port_t {
+    uint32_t command_list_base;
+    uint32_t command_list_base_upper;
+
+    uint32_t fis_base;
+    uint32_t fis_base_upper;
+
+    uint32_t interrupt_status;
+    uint32_t interrupt_enable;
+
+    uint32_t command_status;
+
+    uint32_t : 32; // reserved
+
+    uint32_t task_file_data;
+    uint32_t signature;
+
+    uint32_t sata_status;
+    uint32_t sata_control;
+    uint32_t sata_error;
+    uint32_t sata_active;
+
+    uint32_t command_issue;
+    uint32_t sata_notification;
+
+    uint32_t fis_based_switch_control;
+
+    uint32_t reserved[11];
+    uint32_t vendor[4];
+} hba_port;
+
+typedef volatile struct hba_ghc_t {
+    uint32_t capabilities;
+    uint32_t global_host_control;
+    uint32_t interrupt_status;
+    uint32_t ports_implemented;
+    uint32_t version;
+    uint32_t command_completion_coalescing_control;
+    uint32_t command_completion_coalescing_ports;
+    uint32_t enclosure_management_location;
+    uint32_t enclosure_management_control;
+    uint32_t host_capabilities_extended;
+    uint32_t bios_handoff_control_status;
+    uint8_t reserved[0xa0 - 0x2c];
+    uint8_t vendor[0x100 - 0xa0];
+} hba_ghc;
+
+struct command_header {
+    uint8_t command_fis_length : 5;
+    uint8_t atapi : 1;
+    uint8_t write : 1;
+    uint8_t prefetchable : 1;
+
+    uint8_t reset : 1;
+    uint8_t bist : 1;
+    uint8_t volatile clear_busy_upon_ok : 1;
+    uint8_t reserved0 : 1;
+    uint8_t port_multiplier : 4;
+
+    uint16_t prdt_length;
+
+    uint32_t volatile bytes_transferred;
+
+    uint32_t command_table_base;
+    uint32_t command_table_base_upper;
+
+    uint32_t reserved1[4];
+};
+
+enum fis_type {
+    FIS_REG_H2D = 0x27,
+    FIS_REG_D2H = 0x34,
+    FIS_DMA_ACT = 0x39,
+    FIS_DMA_SETUP = 0x41,
+    FIS_DATA = 0x46,
+    FIS_BIST = 0x58,
+    FIS_PIO_SETUP = 0x5f,
+    FIS_DEV_BITS = 0xa1,
+};
+
+struct fis_reg_h2d {
+    uint8_t fis_type;
+
+    uint8_t pm_port : 4;
+    uint8_t : 3; // reserved
+    uint8_t is_command : 1;
+
+    uint8_t command;
+    uint8_t feature;
+
+    uint8_t lba0;
+    uint8_t lba1;
+    uint8_t lba2;
+    uint8_t device;
+
+    uint8_t lba3;
+    uint8_t lba4;
+    uint8_t lba5;
+    uint8_t feature_high;
+
+    uint16_t count;
+    uint8_t iso_command_completion;
+    uint8_t control_register;
+
+    uint8_t reserved[4];
+};
+
+struct fis_reg_d2h {
+    uint8_t fis_type;
+
+    uint8_t pm_port : 4;
+    uint8_t : 2; // reserved
+    uint8_t interrupt : 1;
+    uint8_t : 1; // reserved
+
+    uint8_t status;
+    uint8_t error;
+
+    uint8_t lba0;
+    uint8_t lba1;
+    uint8_t lba2;
+    uint8_t device;
+
+    uint8_t lba3;
+    uint8_t lba4;
+    uint8_t lba5;
+    uint8_t : 8; // reserved
+
+    uint16_t count;
+    uint8_t reserved1[2];
+
+    uint8_t reserved2[4];
+};
+
+struct fis_pio_setup {
+    uint8_t fis_type;
+
+    uint8_t pm_port : 4;
+    uint8_t : 1; // reserved
+    uint8_t data_transfer_direction : 1; // device to host if set
+    uint8_t interrupt : 1;
+    uint8_t : 1; // reserved
+
+    uint8_t status;
+    uint8_t error;
+
+    uint8_t lba0;
+    uint8_t lba1;
+    uint8_t lba2;
+    uint8_t device;
+
+    uint8_t lba3;
+    uint8_t lba4;
+    uint8_t lba5;
+    uint8_t : 8; // reserved
+
+    uint16_t count;
+    uint8_t reserved1;
+    uint8_t new_status;
+
+    uint16_t transfer_count;
+    uint8_t reserved2[2];
+};
+
+struct received_fis {
+    uint8_t fis_dma_setup[32]; // we don't care about it for now
+
+    fis_pio_setup fispio;
+    uint8_t padding[12];
+
+    fis_reg_d2h fisreg;
+    uint8_t padding2[4];
+
+    uint8_t fissdb[8];
+
+    uint8_t ufis[64];
+
+    uint8_t reserved[96];
+};
+
+struct prdt_entry {
+    uint32_t data_base;
+    uint32_t data_base_upper;
+
+    uint32_t reserved0;
+
+    uint32_t byte_count : 22;
+    uint32_t reserved1 : 9;
+    uint32_t interrupt : 1;
+};
+
+struct command_table {
+    fis_reg_h2d command_fis;
+
+    uint8_t reserved1[44];
+
+    uint8_t atapi_command[16];
+
+    uint8_t reserved2[48];
+
+    prdt_entry prdt[];
+};
+
+static int stop_command(hba_port* port)
+{
+    port->command_status =
+        port->command_status & ~(PORT_CMD_ST | PORT_CMD_FRE);
+
+    uint32_t spins = 0;
+    SPIN(port->command_status & (PORT_CMD_CR | PORT_CMD_FR), spins)
+        return -1;
+
+    return 0;
+}
+
+static int start_command(hba_port* port)
+{
+    uint32_t spins = 0;
+    SPIN(port->command_status & PORT_CMD_CR, spins)
+        return -1;
+
+    port->command_status = port->command_status | PORT_CMD_FRE;
+    port->command_status = port->command_status | PORT_CMD_ST;
+
+    return 0;
+}
+
+static inline hba_port* port_ptr(hba_ghc* ghc, int i)
+{
+    return (hba_port*)((char*)ghc + 0x100 + i * 0x80);
+}
+
+template <std::size_t N>
+struct quick_queue {
+    std::size_t start { };
+    std::size_t end { };
+    uint8_t arr[N] { };
+
+    quick_queue()
+    {
+        for (std::size_t i = 0; i < N; ++i)
+            arr[i] = i;
+    }
+
+    bool empty() { return start == end; }
+    void push(uint8_t val) { arr[end++ % N] = val; }
+    uint8_t pop() { return arr[start++ % N]; }
+};
+
+struct ahci_port {
+private:
+    // quick_queue<32> qu;
+    page_t page;
+    hba_port* port;
+    command_header* cmd_header { };
+    received_fis* fis { };
+    std::size_t sectors { -1U };
+
+    int send_command(char* buf, uint64_t lba, uint32_t count, uint8_t cmd, bool write)
+    {
+        // count must be a multiple of 512
+        if (count & (512 - 1))
+            return -1;
+
+        // TODO: get an availablee command slot
+        int n = 0;
+        // auto n = qu.pop();
+
+        // for now, we read 3.5KB at most at a time
+        // command fis and prdt will take up the lower 128+Bytes
+        auto cmdtable_page = __alloc_raw_page();
+
+        // construct command header
+        memset(cmd_header + n, 0x00, sizeof(command_header));
+        cmd_header[n].command_fis_length = 5;
+        cmd_header[n].clear_busy_upon_ok = 1;
+
+        cmd_header[n].write = write;
+        cmd_header[n].prdt_length = 1;
+        cmd_header[n].command_table_base = cmdtable_page << 12;
+
+        auto* cmdtable = (command_table*)kernel::pmap(cmdtable_page);
+        memset(cmdtable, 0x00, sizeof(command_table) + sizeof(prdt_entry));
+
+        // first, set up command fis
+        cmdtable->command_fis.fis_type = FIS_REG_H2D;
+        cmdtable->command_fis.is_command = 1;
+        cmdtable->command_fis.command = cmd;
+
+        cmdtable->command_fis.lba0 = lba & 0xff;
+        cmdtable->command_fis.lba1 = (lba >> 8) & 0xff;
+        cmdtable->command_fis.lba2 = (lba >> 16) & 0xff;
+        cmdtable->command_fis.device = 1 << 6; // lba mode
+        cmdtable->command_fis.lba3 = (lba >> 24) & 0xff;
+        cmdtable->command_fis.lba4 = (lba >> 32) & 0xff;
+        cmdtable->command_fis.lba5 = (lba >> 40) & 0xff;
+
+        cmdtable->command_fis.count = count >> 9;
+
+        // fill in prdt
+        auto* pprdt = cmdtable->prdt;
+        pprdt->data_base = (cmdtable_page << 12) + 512;
+        pprdt->byte_count = count;
+        pprdt->interrupt = 1;
+
+        // clear the received fis
+        memset(fis, 0x00, sizeof(received_fis));
+
+        // issue the command
+        port->command_issue = 1 << n;
+
+        // TODO: use interrupt
+        uint32_t spins = 0;
+        SPIN(port->task_file_data & (ATA_DEV_BSY | ATA_DEV_DRQ), spins)
+            return -1;
+        
+        SPIN(port->command_issue & (1 << n), spins)
+            return -1;
+        
+        memcpy(buf, (char*)cmdtable + 512, count);
+
+        kernel::pfree(cmdtable_page);
+        __free_raw_page(cmdtable_page);
+        return 0;
+    }
+
+    int identify()
+    {
+        char buf[512];
+        int ret = send_command(buf, 0, 512, 0xEC, false);
+        if (ret != 0)
+            return -1;
+        return 0;
+    }
+
+public:
+    explicit ahci_port(hba_port* port)
+        : page(__alloc_raw_page()), port(port) { }
+
+    ~ahci_port()
+    {
+        if (!cmd_header)
+            return;
+        kernel::pfree(page);
+        __free_raw_page(page);
+    }
+
+    ssize_t read(char* buf, std::size_t buf_size, std::size_t offset, std::size_t cnt)
+    {
+        cnt = std::min(buf_size, cnt);
+
+        char b[512] {};
+        char* orig_buf = buf;
+        size_t start = offset / 512;
+        size_t end = std::min((offset + cnt + 511) / 512, sectors);
+
+        offset %= 512;
+        for (size_t i = start; i < end; ++i) {
+            int status = send_command(b, i, 512, 0xC8, false);
+            if (status != 0)
+                return -EIO;
+
+            size_t to_copy = 0;
+            if (offset)
+                to_copy = 512 - offset;
+            else
+                to_copy = std::max(cnt, 512U);
+            memcpy(buf, b + offset, to_copy);
+            offset = 0;
+            buf += to_copy;
+            cnt -= to_copy;
+        }
+        return buf - orig_buf;
+    }
+
+    int init()
+    {
+        if (stop_command(port) != 0)
+            return -1;
+        port->interrupt_status = ~0;
+        // TODO: use interrupt
+        // port->interrupt_enable = 1;
+
+        port->command_list_base = page << 12;
+        port->command_list_base_upper = 0;
+
+        port->fis_base = (page << 12) + 0x400;
+        port->fis_base_upper = 0;
+
+        cmd_header = (command_header*)kernel::pmap(page, false);
+        fis = (received_fis*)(cmd_header + 1);
+
+        if (start_command(port) != 0)
+            return -1;
+
+        if (identify() != 0)
+            return -1;
+
+        return 0;
+    }
+};
+
+class ahci_module : public virtual kernel::module::module {
+private:
+    hba_ghc* ghc { };
+    pci_device* dev { };
+    std::vector<ahci_port*> ports;
+
+public:
+    ahci_module() : module("ahci") { }
+    ~ahci_module()
+    {
+        // TODO: release PCI device
+        if (ghc)
+            kernel::pfree(dev->reg[PCI_REG_ABAR] >> 12);
+
+        for (auto& item : ports) {
+            if (!item)
+                continue;
+
+            delete item;
+            item = nullptr;
+        }
+    }
+
+    int probe_disks()
+    {
+        int ports = this->ghc->ports_implemented;
+        for (int n = 0; ports; ports >>= 1, ++n) {
+            if (!(ports & 1))
+                continue;
+
+            auto* ghc_port = port_ptr(this->ghc, n);
+            if ((ghc_port->sata_status & 0x0f) != 0x03)
+                continue;
+
+            auto* port = new ahci_port(ghc_port);
+            if (port->init() != 0) {
+                delete port;
+                kmsg("An error occurred while configuring an ahci port\n");
+                continue;
+            }
+
+            this->ports[n] = port;
+
+            fs::register_block_device(fs::make_node(8, n * 8), {
+                [port](char* buf, std::size_t buf_size, std::size_t offset, std::size_t cnt) {
+                    return port->read(buf, buf_size, offset, cnt);
+                }, nullptr
+            });
+
+            fs::partprobe();
+        }
+
+        return 0;
+    }
+
+    virtual int init() override
+    {
+        ports.resize(32);
+
+        auto ret = kernel::hw::pci::register_driver(VENDOR_INTEL, DEVICE_AHCI,
+            [this](pci_device* dev) -> int {
+                this->dev = dev;
+                uint32_t abar_address = dev->reg[PCI_REG_ABAR];
+
+                void* base = kernel::pmap(abar_address >> 12, false);
+                this->ghc = (hba_ghc*)base;
+
+                this->ghc->global_host_control =
+                    this->ghc->global_host_control | 2; // set interrupt enable
+                
+                return this->probe_disks();
+        });
+
+        if (ret != 0)
+            return MODULE_FAILED;
+        return MODULE_SUCCESS;
+    }
+};
+
+} // namespace ahci
+
+kernel::module::module* ahci_module_init()
+{ return new ahci::ahci_module(); }
+INTERNAL_MODULE(ahci_module_loader, ahci_module_init);

+ 0 - 235
src/kernel/hw/ata.cpp

@@ -1,235 +0,0 @@
-#include <asm/port_io.h>
-#include <assert.h>
-#include <fs/fat.hpp>
-#include <kernel/hw/ata.hpp>
-#include <kernel/syscall.hpp>
-#include <kernel/vfs.hpp>
-#include <stdint.h>
-#include <stdio.h>
-#include <types/allocator.hpp>
-#include <types/status.h>
-
-hw::ata::ata(port_id_t p)
-    : data(p)
-    , error(p + 1)
-    , feats(p + 1)
-    , count(p + 2)
-    , lbalo(p + 3)
-    , lbami(p + 4)
-    , lbahi(p + 5)
-    , drive(p + 6)
-    , stats(p + 7)
-    , comms(p + 7)
-    , slave_flag(0x00)
-{
-}
-
-hw::ata::stat_t hw::ata::status(void) const
-{
-    return hw::ata::stat_t { *stats };
-}
-
-bool hw::ata::identify(void) const
-{
-    char buf[512] {};
-
-    drive = 0xa0 | slave_flag;
-    count = 0;
-    lbalo = 0;
-    lbami = 0;
-    lbahi = 0;
-    comms = 0xec;
-
-    stat_t stat {};
-    while ((stat = status()).in.bsy)
-        ;
-
-    if (stat.in.err)
-        return false;
-
-    read_data(buf, 512);
-
-    if (!status().in.rdy)
-        return false;
-
-    return true;
-}
-
-int hw::ata::select(bool master)
-{
-    if (master)
-        slave_flag = 0x00;
-    else
-        slave_flag = 0x10;
-
-    drive = 0xe0 | slave_flag;
-    return GB_OK;
-}
-
-size_t hw::ata::read_data(char* buf, size_t n) const
-{
-    size_t orig_n = n;
-    n /= 2;
-    while (status().in.drq && n--) {
-        *(uint16_t*)buf = *data;
-        buf += sizeof(uint16_t);
-    }
-    return orig_n - n * 2;
-}
-
-size_t hw::ata::write_data(const char* buf, size_t n) const
-{
-    size_t orig_n = n;
-    n /= 2;
-    while (status().in.drq && n--) {
-        data = *(uint16_t*)buf;
-        buf += sizeof(uint16_t);
-    }
-    return orig_n - n * 2;
-}
-
-int hw::ata::read_sector(char* buf, uint32_t lba_low, uint16_t lba_high) const
-{
-    count = 0x00; // HIGH BYTE
-    lbalo = (lba_low >> 24) & 0xff;
-    lbami = lba_high & 0xff;
-    lbahi = (lba_high >> 8) & 0xff;
-    count = 0x01; // LOW BYTE
-    lbalo = lba_low & 0xff;
-    lbami = (lba_low >> 8) & 0xff;
-    lbahi = (lba_low >> 16) & 0xff;
-    comms = 0x24; // READ SECTORS EXT
-
-    while (status().in.bsy)
-        ;
-    if (status().in.drq)
-        read_data(buf, 512);
-    return GB_OK;
-}
-
-int hw::ata::write_sector(const char* buf, uint32_t lba_low, uint16_t lba_high) const
-{
-    count = 0x00; // HIGH BYTE
-    lbalo = (lba_low >> 24) & 0xff;
-    lbami = lba_high & 0xff;
-    lbahi = (lba_high >> 8) & 0xff;
-    count = 0x01; // LOW BYTE
-    lbalo = lba_low & 0xff;
-    lbami = (lba_low >> 8) & 0xff;
-    lbahi = (lba_low >> 16) & 0xff;
-    comms = 0x24; // READ SECTORS EXT
-
-    while (status().in.bsy)
-        ;
-    if (status().in.drq)
-        write_data(buf, 512);
-    return GB_OK;
-}
-
-static hw::ata* ata_pri;
-static hw::ata* ata_sec;
-constexpr hw::ata** p_ata_pri = &ata_pri;
-constexpr hw::ata** p_ata_sec = &ata_sec;
-
-// data1: offset sectors
-// data2: limit sectors
-template <hw::ata** ata_bus>
-size_t _ata_read(fs::special_node* sn, char* buf, size_t buf_size, size_t offset, size_t n)
-{
-    assert(buf_size >= n);
-
-    char b[512] {};
-    char* orig_buf = buf;
-    size_t start = sn->data1 + offset / 512;
-    size_t end = sn->data1 + (offset + n + 511) / 512;
-    if (end > sn->data1 + sn->data2)
-        end = sn->data1 + sn->data2;
-    offset %= 512;
-    for (size_t i = start; i < end; ++i) {
-        (void)(*ata_bus)->read_sector(b, i, 0);
-        size_t to_copy = 0;
-        if (offset)
-            to_copy = 512 - offset;
-        else
-            to_copy = n > 512 ? 512 : n;
-        memcpy(buf, b + offset, to_copy);
-        offset = 0;
-        buf += to_copy;
-        n -= to_copy;
-    }
-    return buf - orig_buf;
-}
-
-struct PACKED mbr_part_entry {
-    uint8_t attr;
-    uint8_t chs_start[3];
-    uint8_t type;
-    uint8_t chs_end[3];
-    uint32_t lba_start;
-    uint32_t cnt;
-};
-
-struct PACKED mbr {
-    uint8_t code[440];
-    uint32_t signature;
-    uint16_t reserved;
-    struct mbr_part_entry parts[4];
-    uint16_t magic;
-};
-
-static inline void mbr_part_probe(fs::inode* drive, uint16_t major, uint16_t minor)
-{
-    struct mbr hda_mbr {
-    };
-    // TODO: devtmpfs
-    auto* dev = fs::vfs_open(*fs::fs_root, "/dev");
-    assert(dev);
-
-    fs::vfs_read(drive, (char*)&hda_mbr, 512, 0, 512);
-
-    for (const auto& part : hda_mbr.parts) {
-        if (!part.type)
-            continue;
-
-        fs::register_special_block(major, minor++,
-            _ata_read<p_ata_pri>,
-            nullptr,
-            part.lba_start, part.cnt);
-
-        fs::vfs_mknode(dev, "hda1", { .in { .major = 2, .minor = 1 } });
-    }
-}
-
-// data: void (*func_to_call_next)(void)
-void hw::init_ata(void)
-{
-    ata_pri = new hw::ata(ATA_PRIMARY_BUS_BASE);
-    if (ata_pri->identify())
-        ata_pri->select(true);
-
-    ata_sec = new hw::ata(ATA_SECONDARY_BUS_BASE);
-    if (ata_sec->identify())
-        ata_sec->select(true);
-
-    // data1: offset sectors
-    // data2: limit sectors
-    fs::register_special_block(
-        2, 0,
-        _ata_read<p_ata_pri>,
-        nullptr,
-        0,
-        0xffffffff);
-
-    // data1: offset sectors
-    // data2: limit sectors
-    fs::register_special_block(
-        2, 8,
-        _ata_read<p_ata_sec>,
-        nullptr,
-        0,
-        0xffffffff);
-
-    auto* hda = fs::vfs_open(*fs::fs_root, "/dev/hda");
-    assert(hda);
-    mbr_part_probe(hda->ind, 2, 1);
-}

+ 143 - 0
src/kernel/hw/pci.cc

@@ -0,0 +1,143 @@
+#include <kernel/hw/pci.hpp>
+#include <kernel/hw/port.hpp>
+#include <kernel/errno.h>
+
+#include <map>
+
+#include <assert.h>
+#include <stdint.h>
+
+using kernel::hw::p32;
+
+constexpr p32 paddr(0xCF8);
+constexpr p32 pdata(0xCFC);
+
+using device_no = uint32_t;
+constexpr device_no make_device(uint32_t vendor, uint32_t device)
+{
+    return (vendor << 16) | device;
+}
+
+namespace kernel::hw::pci {
+
+// lower 16 bits are vendor id, higher 16 bits are device id
+std::map<device_no, pci_device>* pci_devices_p;
+std::map<device_no, driver_t>* pci_drivers_p;
+
+// getter of the global variable
+std::map<device_no, pci_device>& pci_devices()
+{
+    if (!pci_devices_p) [[unlikely]]
+        pci_devices_p = new std::map<device_no, pci_device>();
+    return *pci_devices_p;
+}
+
+// getter of the global variable
+std::map<device_no, driver_t>& pci_drivers()
+{
+    if (!pci_drivers_p) [[unlikely]]
+        pci_drivers_p = new std::map<device_no, driver_t>();
+    return *pci_drivers_p;
+}
+
+// class config_reg
+
+uint32_t config_reg::read32(uint32_t offset) const
+{
+    paddr = (addr_base | (offset & 0xFC));
+    return *pdata;
+}
+
+uint16_t config_reg::read16(uint16_t offset) const
+{
+    return (read32(offset) >> ((offset & 2) << 3)) & 0xFFFF;
+}
+
+uint32_t config_reg::operator[](uint32_t n) const
+{
+    return read32(n << 2);
+}
+
+// end class config_reg
+
+// class pci_device
+
+pci_device::pci_device(config_reg reg)
+    : reg(reg)
+{
+    uint32_t tmp = reg[0];
+
+    vendor = tmp & 0xFFFF;
+    device = tmp >> 16;
+
+    tmp = reg[2];
+    revision_id = tmp & 0xFF;
+    subclass = (tmp >> 16) & 0xFF;
+    class_code = tmp >> 24;
+
+    tmp = reg[3];
+    header_type = (tmp >> 16) & 0xFF;
+}
+
+// end class pci_device
+
+pci_device* probe_device(uint8_t bus, uint8_t dev, uint8_t func)
+{
+    config_reg reg(bus, dev, func);
+
+    uint32_t tmp = reg[0];
+    uint16_t vendor = tmp & 0xFFFF;
+    uint16_t device = tmp >> 16;
+
+    if (vendor == 0xFFFF)
+        return nullptr;
+
+    auto [ iter, inserted ] = hw::pci::pci_devices().emplace(
+        make_device(vendor, device), reg);
+    assert(inserted);
+
+    return &iter->second;
+}
+
+int register_driver(uint16_t vendor, uint16_t device, driver_t drv)
+{
+    auto& drivers = pci_drivers();
+    device_no dev = make_device(vendor, device);
+
+    auto iter = drivers.find(dev);
+    if (iter != drivers.end())
+        return -EEXIST;
+
+    auto [ _, inserted ] = drivers.emplace(dev, drv);
+    assert(inserted);
+
+    auto& devices = pci_devices();
+    auto deviter = devices.find(dev);
+
+    // TODO: check status or print log
+    if (deviter != devices.end())
+        drv(&deviter->second);
+
+    return 0;
+}
+
+} // namespace kernel::hw::pci
+
+namespace kernel::kinit {
+
+SECTION(".text.kinit")
+void init_pci()
+{
+    for (int bus = 0; bus < 256; ++bus) {
+        for (int dev = 0; dev < 32; ++dev) {
+            for (int func = 0; func < 8; ++func) {
+                auto* pcidev = hw::pci::probe_device(bus, dev, func);
+                if (!pcidev)
+                    break;
+                // TODO: call driver if exists
+            }
+        }
+    }
+}
+
+} // namespace kernel::kinit

+ 3 - 3
src/kernel/hw/timer.c

@@ -10,9 +10,9 @@ void init_pit(void)
     asm_outb(PORT_PIT_CONTROL, 0x34);
 
     // send interval number
-    // 0x2e9c = 11932 = 100Hz
-    asm_outb(PORT_PIT_COUNT, 0x9c);
-    asm_outb(PORT_PIT_COUNT, 0x2e);
+    // 0x04a9 = 1193 = 1000Hz
+    asm_outb(PORT_PIT_COUNT, 0xa9);
+    asm_outb(PORT_PIT_COUNT, 0x04);
 }
 
 void inc_tick(void)

+ 18 - 7
src/kernel/mem.cpp

@@ -472,7 +472,7 @@ int mmap(
 {
     auto& mms = current_process->mms;
 
-    if (!S_ISREG(file->mode) && !S_ISBLK(file->mode)) [[unlikely]] {
+    if (!file && !S_ISREG(file->mode) && !S_ISBLK(file->mode)) [[unlikely]] {
         errno = EINVAL;
         return GB_FAILED;
     }
@@ -489,11 +489,19 @@ int mmap(
         return GB_FAILED;
     }
 
-    auto& mm = mms.add_empty_area(hint, n_pgs, PAGE_MMAP | PAGE_COW, write, priv);
+    if (file) {
+        auto& mm = mms.add_empty_area(hint, n_pgs, PAGE_MMAP | PAGE_COW, write, priv);
 
-    mm.attr.mapped = 1;
-    mm.mapped_file = file;
-    mm.file_offset = offset;
+        mm.attr.mapped = 1;
+        mm.mapped_file = file;
+        mm.file_offset = offset;
+    }
+    else {
+        // private mapping of zero-filled pages
+        auto& mm = mms.add_empty_area(hint, n_pgs, PAGE_COW, write, priv);
+
+        mm.attr.mapped = 0;
+    }
 
     return GB_OK;
 }
@@ -553,7 +561,7 @@ static types::bitmap freebm(
     [](unsigned char*, std::size_t){}, _freebm, 0x400);
 } // namespace __physmapper
 
-void* kernel::pmap(page_t pg)
+void* kernel::pmap(page_t pg, bool cached)
 {
     auto* const pmap_pt = std::bit_cast<pte_t*>(0xff001000);
     auto* const mapped_start = std::bit_cast<void*>(0xff000000);
@@ -568,7 +576,10 @@ void* kernel::pmap(page_t pg)
     for (int i = 2; i < 0x400; ++i) {
         if (__physmapper::freebm.test(i) == 0) {
             auto* pte = pmap_pt + i;
-            pte->v = 0x3;
+            if (cached)
+                pte->v = 0x3;
+            else
+                pte->v = 0x13;
             pte->in.page = pg;
 
             void* ptr = vptradd(mapped_start, 0x1000 * i);

+ 18 - 0
src/kernel/module.cc

@@ -0,0 +1,18 @@
+#include <kernel/module.hpp>
+
+namespace kernel::module {
+
+module::module(const char* name) : name(name) { }
+
+int insmod(module* mod) {
+    int ret = mod->init();
+
+    if (ret == MODULE_FAILED) {
+        delete mod;
+        return MODULE_FAILED;
+    }
+
+    return MODULE_SUCCESS;
+}
+
+} // namespace kernel::module

+ 49 - 28
src/kernel/process.cpp

@@ -7,11 +7,11 @@
 #include <asm/sys.h>
 #include <assert.h>
 #include <fs/fat.hpp>
-#include <kernel/hw/ata.hpp>
 #include <kernel/interrupt.h>
 #include <kernel/log.hpp>
 #include <kernel/mem.h>
 #include <kernel/mm.hpp>
+#include <kernel/module.hpp>
 #include <kernel/process.hpp>
 #include <kernel/signal.hpp>
 #include <kernel/vfs.hpp>
@@ -257,7 +257,27 @@ void kernel_threadd_main(void)
     }
 }
 
-void NORETURN _kernel_init(void)
+static void release_kinit()
+{
+    extern char __stage1_start[];
+    extern char __kinit_end[];
+
+    kernel::paccess pa(EARLY_KERNEL_PD_PAGE);
+    auto pd = (pd_t)pa.ptr();
+    assert(pd);
+    (*pd)[0].v = 0;
+
+    // free pt#0
+    __free_raw_page(0x00002);
+
+    // free .stage1 and .kinit
+    for (uint32_t i = ((uint32_t)__stage1_start >> 12);
+            i < ((uint32_t)__kinit_end >> 12); ++i) {
+        __free_raw_page(i);
+    }
+}
+
+static void create_kthreadd_process()
 {
     // pid 2 is kernel thread daemon
     auto& proc = procs->emplace(1);
@@ -285,15 +305,38 @@ void NORETURN _kernel_init(void)
     push_stack(esp, 0x200);
 
     readythds->push(&thd);
+}
 
-    // ------------------------------------------
+void NORETURN _kernel_init(void)
+{
+    create_kthreadd_process();
+
+    release_kinit();
 
     asm_sti();
 
-    hw::init_ata();
+    // ------------------------------------------
+    // interrupt enabled
+    // ------------------------------------------
+
+    // load kmods
+    for (auto loader = kernel::module::kmod_loaders_start; *loader; ++loader) {
+        auto* mod = (*loader)();
+        if (!mod)
+            continue;
+
+        auto ret = insmod(mod);
+        if (ret == kernel::module::MODULE_SUCCESS)
+            continue;
+
+        char buf[256];
+        snprintf(buf, sizeof(buf),
+            "[kernel] An error occured while loading \"%s\"\n", mod->name);
+        kmsg(buf);
+    }
 
     // TODO: parse kernel parameters
-    auto* drive = fs::vfs_open(*fs::fs_root, "/dev/hda1");
+    auto* drive = fs::vfs_open(*fs::fs_root, "/dev/sda1");
     assert(drive);
     auto* _new_fs = fs::register_fs(new fs::fat::fat32(drive->ind));
     auto* mnt = fs::vfs_open(*fs::fs_root, "/mnt");
@@ -349,27 +392,9 @@ void k_new_thread(void (*func)(void*), void* data)
     kthreadd_new_thd_data = data;
 }
 
+SECTION(".text.kinit")
 void NORETURN init_scheduler(void)
 {
-    {
-        extern char __stage1_start[];
-        extern char __kinit_end[];
-
-        kernel::paccess pa(EARLY_KERNEL_PD_PAGE);
-        auto pd = (pd_t)pa.ptr();
-        assert(pd);
-        (*pd)[0].v = 0;
-
-        // free pt#0
-        __free_raw_page(0x00002);
-
-        // free .stage1 and .kinit
-        for (uint32_t i = ((uint32_t)__stage1_start >> 12);
-             i < ((uint32_t)__kinit_end >> 12); ++i) {
-            __free_raw_page(i);
-        }
-    }
-
     procs = new proclist;
     readythds = new readyqueue;
 
@@ -385,10 +410,6 @@ void NORETURN init_scheduler(void)
     init.files.open(init, "/dev/console", O_WRONLY, 0);
     init.files.open(init, "/dev/console", O_WRONLY, 0);
 
-    // we need interrupts enabled for cow mapping so now we disable it
-    // in case timer interrupt mess things up
-    asm_cli();
-
     current_process = &init;
     current_thread = &thd;
     readythds->push(current_thread);

+ 220 - 68
src/kernel/vfs.cpp

@@ -1,3 +1,4 @@
+#include <cstddef>
 #include <map>
 #include <vector>
 #include <bit>
@@ -7,6 +8,7 @@
 
 #include <assert.h>
 #include <kernel/errno.h>
+#include <kernel/log.hpp>
 #include <kernel/mem.h>
 #include <kernel/process.hpp>
 #include <kernel/tty.hpp>
@@ -182,7 +184,7 @@ size_t fs::vfs::inode_write(inode*, const char*, size_t, size_t)
 { return -EINVAL; }
 int fs::vfs::inode_mkfile(dentry*, const char*, mode_t)
 { return -EINVAL; }
-int fs::vfs::inode_mknode(dentry*, const char*, node_t)
+int fs::vfs::inode_mknode(dentry*, const char*, mode_t, node_t)
 { return -EINVAL; }
 int fs::vfs::inode_rmfile(dentry*, const char*)
 { return -EINVAL; }
@@ -306,9 +308,12 @@ public:
         return GB_OK;
     }
 
-    virtual int inode_mknode(dentry* dir, const char* filename, fs::node_t sn) override
+    virtual int inode_mknode(dentry* dir, const char* filename, mode_t mode, fs::node_t sn) override
     {
-        auto& node = *cache_inode(0, _savedata(sn.v), S_IFBLK | 0777, 0, 0);
+        if (!S_ISBLK(mode) && !S_ISCHR(mode))
+            return -EINVAL;
+
+        auto& node = *cache_inode(0, _savedata(sn), mode, 0, 0);
         mklink(dir->ind, &node, filename);
         dir->append(get_inode(node.ino), filename, true);
         return GB_OK;
@@ -380,10 +385,10 @@ public:
 
         if (mask & STATX_TYPE) {
             st->stx_mode |= ind->mode & S_IFMT;
-            if (S_ISBLK(mode)) {
-                fs::node_t nd { (uint32_t)as_val(_getdata(ind->ino)) };
-                st->stx_rdev_major = nd.in.major;
-                st->stx_rdev_minor = nd.in.minor;
+            if (S_ISBLK(mode) || S_ISCHR(mode)) {
+                auto nd = (fs::node_t)as_val(_getdata(ind->ino));
+                st->stx_rdev_major = NODE_MAJOR(nd);
+                st->stx_rdev_minor = NODE_MINOR(nd);
             }
             st->stx_mask |= STATX_TYPE;
         }
@@ -558,8 +563,8 @@ void fs::fifo_file::close(void)
     ppipe.reset();
 }
 
-// 8 * 8 for now
-static fs::special_node sns[8][8];
+static std::map<fs::node_t, fs::blkdev_ops> blkdevs;
+static std::map<fs::node_t, fs::chrdev_ops> chrdevs;
 
 size_t fs::vfs_read(fs::inode* file, char* buf, size_t buf_size, size_t offset, size_t n)
 {
@@ -568,26 +573,28 @@ size_t fs::vfs_read(fs::inode* file, char* buf, size_t buf_size, size_t offset,
         return -1U;
     }
 
-    if (S_ISBLK(file->mode)) {
-        uint32_t ret = file->fs->inode_getnode(file);
-        if (ret == SN_INVALID) {
-            errno = EINVAL;
-            return 0xffffffff;
-        }
-        fs::node_t sn {
-            .v = ret
-        };
-        auto* ptr = &sns[sn.in.major][sn.in.minor];
-        auto* ops = &ptr->ops;
-        if (ops && ops->read)
-            return ops->read(ptr, buf, buf_size, offset, n);
-        else {
-            errno = EINVAL;
-            return 0xffffffff;
-        }
-    } else {
+    if (S_ISREG(file->mode))
         return file->fs->inode_read(file, buf, buf_size, offset, n);
+
+    if (S_ISBLK(file->mode) || S_ISCHR(file->mode)) {
+        node_t sn = file->fs->inode_getnode(file);
+
+        ssize_t ret;
+        if (S_ISBLK(file->mode))
+            ret = block_device_read(sn, buf, buf_size, offset, n);
+        else
+            ret = char_device_read(sn, buf, buf_size, n);
+
+        if (ret < 0) {
+            errno = -ret;
+            return -1U;
+        }
+
+        return ret;
     }
+
+    errno = EINVAL;
+    return -1U;
 }
 size_t fs::vfs_write(fs::inode* file, const char* buf, size_t offset, size_t n)
 {
@@ -596,34 +603,36 @@ size_t fs::vfs_write(fs::inode* file, const char* buf, size_t offset, size_t n)
         return -1U;
     }
 
-    if (S_ISBLK(file->mode)) {
-        uint32_t ret = file->fs->inode_getnode(file);
-        if (ret == SN_INVALID) {
-            errno = EINVAL;
-            return 0xffffffff;
-        }
-        fs::node_t sn {
-            .v = ret
-        };
-        auto* ptr = &sns[sn.in.major][sn.in.minor];
-        auto* ops = &ptr->ops;
-        if (ops && ops->write)
-            return ops->write(ptr, buf, offset, n);
-        else {
-            errno = EINVAL;
-            return 0xffffffff;
-        }
-    } else {
+    if (S_ISREG(file->mode))
         return file->fs->inode_write(file, buf, offset, n);
+
+    if (S_ISBLK(file->mode) || S_ISCHR(file->mode)) {
+        node_t sn = file->fs->inode_getnode(file);
+
+        ssize_t ret;
+        if (S_ISBLK(file->mode))
+            ret = block_device_write(sn, buf, offset, n);
+        else
+            ret = char_device_write(sn, buf, n);
+
+        if (ret < 0) {
+            errno = -ret;
+            return -1U;
+        }
+
+        return ret;
     }
+
+    errno = EINVAL;
+    return -1U;
 }
 int fs::vfs_mkfile(fs::vfs::dentry* dir, const char* filename, mode_t mode)
 {
     return dir->ind->fs->inode_mkfile(dir, filename, mode);
 }
-int fs::vfs_mknode(fs::vfs::dentry* dir, const char* filename, fs::node_t sn)
+int fs::vfs_mknode(fs::vfs::dentry* dir, const char* filename, mode_t mode, fs::node_t sn)
 {
-    return dir->ind->fs->inode_mknode(dir, filename, sn);
+    return dir->ind->fs->inode_mknode(dir, filename, mode, sn);
 }
 int fs::vfs_rmfile(fs::vfs::dentry* dir, const char* filename)
 {
@@ -656,19 +665,163 @@ int fs::vfs_stat(fs::vfs::dentry* ent, statx* stat, unsigned int mask)
 
 static std::list<fs::vfs*>* fs_es;
 
-void fs::register_special_block(
-    uint16_t major,
-    uint16_t minor,
-    fs::special_node_read read,
-    fs::special_node_write write,
-    uint32_t data1,
-    uint32_t data2)
+int fs::register_block_device(fs::node_t node, fs::blkdev_ops ops)
+{
+    auto iter = blkdevs.find(node);
+    if (iter)
+        return -EEXIST;
+
+    std::tie(iter, std::ignore) = blkdevs.emplace(node, std::move(ops));
+    return 0;
+}
+
+int fs::register_char_device(fs::node_t node, fs::chrdev_ops ops)
+{
+    auto iter = chrdevs.find(node);
+    if (iter)
+        return -EEXIST;
+
+    std::tie(iter, std::ignore) = chrdevs.emplace(node, std::move(ops));
+    return 0;
+}
+
+// MBR partition table, used by partprobe()
+
+struct PACKED mbr_part_entry {
+    uint8_t attr;
+    uint8_t chs_start[3];
+    uint8_t type;
+    uint8_t chs_end[3];
+    uint32_t lba_start;
+    uint32_t cnt;
+};
+
+struct PACKED mbr {
+    uint8_t code[440];
+    uint32_t signature;
+    uint16_t reserved;
+    mbr_part_entry parts[4];
+    uint16_t magic;
+};
+
+static inline void mbr_part_probe(fs::node_t node, char ch)
+{
+    mbr buf_mbr;
+    // TODO: devtmpfs
+    auto* dev = fs::vfs_open(*fs::fs_root, "/dev");
+    if (!dev)
+        return;
+
+    char label[] = "sda1";
+    label[2] = ch;
+    auto ret = fs::block_device_read(node, (char*)&buf_mbr, sizeof(mbr), 0, 512);
+    if (ret < 0) {
+        kmsg("[kernel] cannot read device for part probing.\n");
+        return;
+    }
+
+    int n = 1;
+    for (const auto& part : buf_mbr.parts) {
+        if (n >= 8)
+            break;
+
+        if (!part.type)
+            continue;
+
+        std::size_t part_offset = part.lba_start * 512;
+
+        // TODO: add partition offset limit
+        fs::register_block_device(node + n, {
+            [=](char* buf, size_t buf_size, size_t offset, size_t n) -> ssize_t {
+                offset += part_offset;
+                return fs::block_device_read(node, buf, buf_size, offset, n);
+            },
+            [=](const char* buf, size_t offset, size_t n) -> ssize_t {
+                offset += part_offset;
+                return fs::block_device_write(node, buf, offset, n);
+            }
+        });
+
+        ret = fs::vfs_mknode(dev, label, 0660 | S_IFBLK, node + n);
+        ++n, ++label[3];
+    }
+}
+
+void fs::partprobe()
+{
+    auto* dev = fs::vfs_open(*fs::fs_root, "/dev");
+    if (!dev)
+        return;
+
+    char ch = 'a';
+    char name[] = "sd*";
+    types::string<> path = "/dev/sd*";
+    for (const auto& device : blkdevs) {
+        // only the devices whose minor number is a multiple of 8
+        // are considered as a disk instead of partitions
+        if (NODE_MINOR(device.first) % 8 != 0)
+            continue;
+
+        path.pop();
+        path += ch;
+        name[2] = ch;
+
+        auto* blkfile = fs::vfs_open(*fs::fs_root, path.c_str());
+        if (!blkfile)
+            vfs_mknode(dev, name, 0660 | S_IFBLK, device.first);
+
+        mbr_part_probe(device.first, ch);
+
+        ++ch;
+    }
+}
+
+ssize_t fs::block_device_read(fs::node_t node, char* buf, size_t buf_size, size_t offset, size_t n)
+{
+    if (node == fs::NODE_INVALID)
+        return -EINVAL;
+
+    auto iter = blkdevs.find(node);
+    if (!iter || !iter->second.read)
+        return -EINVAL;
+
+    return iter->second.read(buf, buf_size, offset, n);
+}
+
+ssize_t fs::block_device_write(fs::node_t node, const char* buf, size_t offset, size_t n)
+{
+    if (node == fs::NODE_INVALID)
+        return -EINVAL;
+
+    auto iter = blkdevs.find(node);
+    if (!iter || !iter->second.write)
+        return -EINVAL;
+
+    return iter->second.write(buf, offset, n);
+}
+
+ssize_t fs::char_device_read(fs::node_t node, char* buf, size_t buf_size, size_t n)
 {
-    fs::special_node& sn = sns[major][minor];
-    sn.ops.read = read;
-    sn.ops.write = write;
-    sn.data1 = data1;
-    sn.data2 = data2;
+    if (node == fs::NODE_INVALID)
+        return -EINVAL;
+
+    auto iter = chrdevs.find(node);
+    if (!iter || !iter->second.read)
+        return -EINVAL;
+
+    return iter->second.read(buf, buf_size, n);
+}
+
+ssize_t fs::char_device_write(fs::node_t node, const char* buf, size_t n)
+{
+    if (node == fs::NODE_INVALID)
+        return -EINVAL;
+
+    auto iter = chrdevs.find(node);
+    if (!iter || !iter->second.read)
+        return -EINVAL;
+
+    return iter->second.write(buf, n);
 }
 
 fs::vfs* fs::register_fs(vfs* fs)
@@ -677,22 +830,22 @@ fs::vfs* fs::register_fs(vfs* fs)
     return fs;
 }
 
-size_t b_null_read(fs::special_node*, char* buf, size_t buf_size, size_t, size_t n)
+ssize_t b_null_read(char* buf, size_t buf_size, size_t n)
 {
     if (n >= buf_size)
         n = buf_size;
     memset(buf, 0x00, n);
     return n;
 }
-size_t b_null_write(fs::special_node*, const char*, size_t, size_t n)
+ssize_t b_null_write(const char*, size_t n)
 {
     return n;
 }
-static size_t console_read(fs::special_node*, char* buf, size_t buf_size, size_t, size_t n)
+static ssize_t console_read(char* buf, size_t buf_size, size_t n)
 {
     return console->read(buf, buf_size, n);
 }
-static size_t console_write(fs::special_node*, const char* buf, size_t, size_t n)
+static ssize_t console_write(const char* buf, size_t n)
 {
     size_t orig_n = n;
     while (n--)
@@ -797,10 +950,10 @@ void init_vfs(void)
 {
     using namespace fs;
     // null
-    register_special_block(0, 0, b_null_read, b_null_write, 0, 0);
+    register_char_device(make_node(1, 0), { b_null_read, b_null_write });
     // console (supports serial console only for now)
     // TODO: add interface to bind console device to other devices
-    register_special_block(1, 0, console_read, console_write, 0, 0);
+    register_char_device(make_node(2, 0), { console_read, console_write });
 
     fs_es = types::pnew<types::kernel_ident_allocator>(fs_es);
 
@@ -820,7 +973,6 @@ void init_vfs(void)
 
     auto* dev = vfs_open(*fs_root, "/dev");
     assert(dev);
-    vfs_mknode(dev, "null", { .in { .major = 0, .minor = 0 } });
-    vfs_mknode(dev, "console", { .in { .major = 1, .minor = 0 } });
-    vfs_mknode(dev, "hda", { .in { .major = 2, .minor = 0 } });
+    vfs_mknode(dev, "null", 0666 | S_IFCHR, make_node(1, 0));
+    vfs_mknode(dev, "console", 0666 | S_IFCHR, make_node(2, 0));
 }

+ 2 - 0
src/kinit.cpp

@@ -3,6 +3,7 @@
 #include <assert.h>
 #include <kernel/event/event.h>
 #include <kernel/hw/keyboard.h>
+#include <kernel/hw/pci.hpp>
 #include <kernel/hw/serial.h>
 #include <kernel/hw/timer.h>
 #include <kernel/interrupt.h>
@@ -112,6 +113,7 @@ extern "C" SECTION(".text.kinit") void NORETURN kernel_init(void)
     ret = init_console("ttyS0");
     assert(ret == GB_OK);
 
+    kernel::kinit::init_pci();
     init_vfs();
     init_syscall();