Forráskód Böngészése

feat(fs): add fat32 filesystem

greatbridf 2 éve
szülő
commit
3f508e8775
6 módosított fájl, 705 hozzáadás és 199 törlés
  1. 3 1
      CMakeLists.txt
  2. 162 0
      include/fs/fat.hpp
  3. 75 31
      include/kernel/vfs.hpp
  4. 190 0
      src/fs/fat.cpp
  5. 92 29
      src/kernel/hw/ata.cpp
  6. 183 138
      src/kernel/vfs.cpp

+ 3 - 1
CMakeLists.txt

@@ -46,7 +46,8 @@ endif()
 
 include_directories(${PROJECT_SOURCE_DIR}/include ${PROJECT_SOURCE_DIR}/user-space-program/build)
 
-set(KERNEL_MAIN_SOURCES src/kernel_main.c
+set(KERNEL_MAIN_SOURCES src/fs/fat.cpp
+                        src/kernel_main.c
                         src/kernel/errno.c
                         src/kernel/interrupt.cpp
                         src/kernel/process.cpp
@@ -66,6 +67,7 @@ set(KERNEL_MAIN_SOURCES src/kernel_main.c
                         include/asm/boot.h
                         include/asm/port_io.h
                         include/asm/sys.h
+                        include/fs/fat.hpp
                         include/kernel/event/event.h
                         include/kernel/errno.h
                         include/kernel/tty.h

+ 162 - 0
include/fs/fat.hpp

@@ -0,0 +1,162 @@
+#pragma once
+
+#include <kernel/mem.h>
+#include <kernel/vfs.hpp>
+#include <types/size.h>
+#include <types/stdint.h>
+
+namespace fs::fat {
+using cluster_t = uint32_t;
+
+// for FAT32
+struct PACKED old_boot_sector {
+    uint8_t jmp_instruction[3];
+    char oem_name[8];
+    // usually 512
+    uint16_t bytes_per_sector;
+    uint8_t sectors_per_cluster;
+    // 32 for FAT32
+    uint16_t reserved_sectors;
+    // usually 2
+    uint8_t fat_copies;
+    // 0 for FAT32
+    uint16_t root_directory_entries;
+    // valid before FAT32
+    uint16_t _sectors_cnt;
+    // 0xf8 for hard disk
+    uint8_t type;
+    // valid before FAT32
+    uint16_t _sectors_per_fat;
+    // 12
+    uint16_t sectors_per_track;
+    // 2
+    uint16_t heads;
+    // 0
+    uint16_t hidden_sectors;
+};
+
+// for FAT32
+struct PACKED ext_boot_sector {
+    struct old_boot_sector old;
+    // 0
+    uint16_t hidden_sector_ext;
+    uint32_t sectors_cnt;
+    uint32_t sectors_per_fat;
+    uint16_t mirror_flags;
+    uint16_t fs_version;
+    // 2
+    cluster_t root_directory;
+    // 1
+    uint16_t fs_info_sector;
+    // usually at 6, 0x0000 or 0xffff if none
+    uint16_t backup_boot_sector;
+    uint8_t _reserved[12];
+    // for int $0x13
+    uint8_t drive_number;
+    uint8_t _reserved_for_current_head;
+    // 0x29
+    uint8_t ext_signature;
+    uint32_t serial_number;
+    char label[11];
+    char fs_type[8];
+    uint8_t _reserved_blank[420];
+    // 0x55, 0xaa
+    uint16_t magic;
+};
+
+struct PACKED fs_info_sector {
+    // 0x41615252
+    uint32_t signature_one;
+    uint8_t _reserved[480];
+    // 0x61417272
+    uint32_t signature_two;
+    // may be incorrect
+    uint32_t free_clusters;
+    // hint only
+    uint32_t next_free_cluster;
+    uint8_t _reserved_two[12];
+    // 0xaa550000
+    uint32_t sector_signature;
+};
+
+struct PACKED directory_entry {
+    char filename[8];
+    char extension[3];
+    struct PACKED {
+        uint8_t ro : 1;
+        uint8_t hidden : 1;
+        uint8_t system : 1;
+        uint8_t volume_label : 1;
+        uint8_t subdir : 1;
+        uint8_t archive : 1;
+        uint8_t _reserved : 2;
+    } attributes;
+    uint8_t _reserved;
+    uint8_t c_time_date[5];
+    uint16_t access_date;
+    uint16_t cluster_hi;
+    uint8_t m_time_date[4];
+    uint16_t cluster_lo;
+    uint32_t size;
+};
+
+class fat32 : public virtual fs::vfs {
+private:
+    constexpr static uint32_t SECTOR_SIZE = 512;
+    constexpr static cluster_t EOC = 0xffffff8;
+
+private:
+    uint32_t sector_cnt;
+    uint32_t sectors_per_fat;
+    uint32_t serial_number;
+    uint32_t free_clusters;
+    uint32_t next_free_cluster_hint;
+    cluster_t root_dir;
+    cluster_t data_region_offset;
+    inode* device;
+    uint16_t reserved_sectors;
+    uint8_t fat_copies;
+    uint8_t sectors_per_cluster;
+    char label[12];
+    cluster_t* fat;
+
+    // buf MUST be larger than 512 bytes
+    inline void read_sector(void* buf, uint32_t sector_no);
+
+    // buf MUST be larger than 4096 bytes
+    inline void read_cluster(void* buf, cluster_t no);
+
+    static inline cluster_t cl(const inode* ind)
+    {
+        return reinterpret_cast<cluster_t>(ind->impl);
+    }
+
+    static inline cluster_t _rearrange(directory_entry* d)
+    {
+        return (((cluster_t)d->cluster_hi) << 16) + d->cluster_lo;
+    }
+
+    static inline size_t _write_buf_n(char* buf, size_t buf_size, const char* src, size_t n)
+    {
+        if (n <= buf_size) {
+            memcpy(buf, src, n);
+            return n;
+        } else {
+            memcpy(buf, src, buf_size);
+            return buf_size;
+        }
+    }
+
+protected:
+    virtual int load_dentry(dentry* ent) override;
+
+public:
+    fat32(const fat32&) = delete;
+    explicit fat32(inode* _device);
+    ~fat32();
+
+    virtual size_t inode_read(inode* file, char* buf, size_t buf_size, size_t offset, size_t n) override;
+    virtual int inode_stat(dentry* ent, stat* st) override;
+};
+
+}; // namespace fs::fat

+ 75 - 31
include/kernel/vfs.hpp

@@ -1,9 +1,11 @@
 #pragma once
 
+#include <types/allocator.hpp>
 #include <types/hash_map.hpp>
 #include <types/list.hpp>
 #include <types/stdint.h>
 #include <types/types.h>
+#include <types/vector.hpp>
 
 #define INODE_FILE (1 << 0)
 #define INODE_DIR (1 << 1)
@@ -33,11 +35,7 @@ struct inode {
     void* impl;
     ino_t ino;
     vfs* fs;
-};
-
-struct dirent {
-    char name[128];
-    uint32_t ino;
+    size_t size;
 };
 
 union node_t {
@@ -67,11 +65,52 @@ struct special_node {
 struct stat {
     ino_t st_ino;
     node_t st_rdev;
+    size_t st_size;
     blksize_t st_blksize;
     blkcnt_t st_blocks;
 };
 
 class vfs {
+public:
+    struct dentry {
+    public:
+        using name_type = types::string<>;
+
+    private:
+        types::list<dentry> children;
+        types::hash_map<name_type, dentry*, types::string_hasher<const name_type&>> idx_children;
+
+    public:
+        dentry* parent;
+        inode* ind;
+        // if the entry is not a file, this flag is ignored
+        union {
+            uint32_t v;
+            struct {
+                uint32_t present : 1;
+                uint32_t dirty : 1;
+            } in;
+        } flags;
+        name_type name;
+
+        explicit dentry(dentry* parent, inode* ind, const name_type& name);
+        explicit dentry(dentry* parent, inode* ind, name_type&& name);
+        dentry(const dentry& val) = delete;
+        dentry(dentry&& val);
+
+        dentry& operator=(const dentry& val) = delete;
+        dentry& operator=(dentry&& val) = delete;
+
+        dentry* append(inode* ind, const name_type& name);
+        dentry* append(inode* ind, name_type&& name);
+
+        dentry* find(const name_type& name);
+
+        dentry* replace(dentry* val);
+
+        void invalidate(void);
+    };
+
 private:
     // TODO: use allocator designed for small objects
     using inode_list = types::list<inode>;
@@ -79,18 +118,23 @@ private:
 
 private:
     inode_list _inodes;
-    inode* _root_inode;
-    ino_t _last_inode_no;
     inode_index_cache_list _idx_inodes;
+    types::hash_map<dentry*, dentry*, types::linux_hasher<dentry*>> _mount_recover_list;
+    ino_t _last_inode_no;
 
 private:
     ino_t _assign_inode_id(void);
 
 protected:
-    inode* cache_inode(inode_flags flags, uint32_t perm, void* impl_data);
+    dentry _root;
+
+protected:
+    inode* cache_inode(inode_flags flags, uint32_t perm, size_t size, void* impl_data);
     inode* get_inode(ino_t ino);
     void register_root_node(inode* root);
 
+    virtual int load_dentry(dentry* ent);
+
 public:
     explicit vfs(void);
     vfs(const vfs&) = delete;
@@ -98,21 +142,23 @@ public:
     vfs(vfs&&) = delete;
     vfs& operator=(vfs&&) = delete;
 
-    inode* root(void) const;
+    constexpr dentry* root(void)
+    {
+        return &_root;
+    }
+
+    int mount(dentry* mnt, vfs* new_fs);
 
     virtual size_t inode_read(inode* file, char* buf, size_t buf_size, size_t offset, size_t n);
     virtual size_t inode_write(inode* file, const char* buf, size_t offset, size_t n);
-    virtual int inode_readdir(inode* dir, dirent* entry, size_t i);
-    virtual int inode_mkfile(inode* dir, const char* filename);
-    virtual int inode_mknode(inode* dir, const char* filename, union node_t sn);
-    virtual int inode_rmfile(inode* dir, const char* filename);
-    virtual int inode_mkdir(inode* dir, const char* dirname);
-    virtual int inode_stat(inode* dir, stat* stat, const char* dirname);
-    // requires inode_readdir to work
-    virtual inode* inode_findinode(inode* dir, const char* filename);
+    virtual int inode_mkfile(dentry* dir, const char* filename);
+    virtual int inode_mknode(dentry* dir, const char* filename, union node_t sn);
+    virtual int inode_rmfile(dentry* dir, const char* filename);
+    virtual int inode_mkdir(dentry* dir, const char* dirname);
+    virtual int inode_stat(dentry* dir, stat* stat);
 };
 
-extern struct inode* fs_root;
+extern fs::vfs::dentry* fs_root;
 
 void register_special_block(uint16_t major,
     uint16_t minor,
@@ -121,21 +167,19 @@ void register_special_block(uint16_t major,
     uint32_t data1,
     uint32_t data2);
 
+vfs* register_fs(vfs* fs);
+
 size_t vfs_read(inode* file, char* buf, size_t buf_size, size_t offset, size_t n);
 size_t vfs_write(inode* file, const char* buf, size_t offset, size_t n);
-int vfs_readdir(inode* dir, dirent* entry, size_t i);
-inode* vfs_findinode(inode* dir, const char* filename);
-int vfs_mkfile(inode* dir, const char* filename);
-int vfs_mknode(inode* dir, const char* filename, node_t sn);
-int vfs_rmfile(inode* dir, const char* filename);
-int vfs_mkdir(inode* dir, const char* dirname);
-
-// requires inode_findinode to work
-// @return pointer to the inode if found, nullptr if not
-inode* vfs_open(const char* path);
-
-// @return GB_OK if succeed, GB_FAILED if failed and set errno
-int vfs_stat(struct stat* stat, const char* path);
+int vfs_mkfile(fs::vfs::dentry* dir, const char* filename);
+int vfs_mknode(fs::vfs::dentry* dir, const char* filename, node_t sn);
+int vfs_rmfile(fs::vfs::dentry* dir, const char* filename);
+int vfs_mkdir(fs::vfs::dentry* dir, const char* dirname);
+int vfs_stat(const char* filename, stat* stat);
+int vfs_stat(fs::vfs::dentry* ent, stat* stat);
+
+// @return pointer to the dentry if found, nullptr if not
+fs::vfs::dentry* vfs_open(const char* path);
 
 } // namespace fs
 

+ 190 - 0
src/fs/fat.cpp

@@ -0,0 +1,190 @@
+#include <fs/fat.hpp>
+#include <kernel/mem.h>
+#include <kernel/mm.hpp>
+#include <kernel/syscall.hpp>
+#include <kernel/vfs.hpp>
+#include <types/allocator.hpp>
+#include <types/hash_map.hpp>
+#include <types/status.h>
+#include <types/stdint.h>
+
+namespace fs::fat {
+// buf MUST be larger than 512 bytes
+inline void fat32::read_sector(void* buf, uint32_t sector_no)
+{
+    if (vfs_read(
+            device,
+            (char*)buf,
+            SECTOR_SIZE,
+            sector_no * SECTOR_SIZE,
+            SECTOR_SIZE)
+        != SECTOR_SIZE) {
+        syscall(0x03);
+    }
+}
+
+// buf MUST be larger than 4096 bytes
+inline void fat32::read_cluster(void* buf, cluster_t no)
+{
+    // data cluster start from cluster #2
+    no -= 2;
+    for (int i = 0; i < sectors_per_cluster; ++i) {
+        // skip reserved sectors
+        read_sector((char*)buf + SECTOR_SIZE * i, data_region_offset + no * sectors_per_cluster + i);
+    }
+}
+
+int fat32::load_dentry(dentry* ent)
+{
+    cluster_t next = cl(ent->ind);
+    auto buf = (char*)k_malloc(4096);
+    do {
+        read_cluster(buf, next);
+        auto* d = reinterpret_cast<directory_entry*>(buf);
+        for (; d->filename[0]; ++d) {
+            if (d->attributes.volume_label)
+                continue;
+            auto* ind = cache_inode({ .in {
+                                        .file = !d->attributes.subdir,
+                                        .directory = d->attributes.subdir,
+                                        .mount_point = 0,
+                                        .special_node = 0,
+                                    } },
+                0777, d->size, (void*)_rearrange(d));
+            types::string<> fname;
+            for (int i = 0; i < 8; ++i) {
+                if (d->filename[i] == ' ')
+                    break;
+                fname += d->filename[i];
+            }
+            if (d->extension[0] != ' ') {
+                fname += '.';
+                fname += d->extension[0];
+            }
+            for (int i = 1; i < 3; ++i) {
+                if (d->extension[i] == ' ')
+                    break;
+                fname += d->extension[i];
+            }
+            ent->append(ind, fname);
+        }
+        next = fat[next];
+    } while (next < EOC);
+    k_free(buf);
+    return GB_OK;
+}
+
+fat32::fat32(inode* _device)
+    : device(_device)
+    , label { 0 }
+{
+    char* buf = (char*)k_malloc(SECTOR_SIZE);
+    read_sector(buf, 0);
+
+    auto* info = reinterpret_cast<ext_boot_sector*>(buf);
+
+    sector_cnt = info->sectors_cnt;
+    sectors_per_fat = info->sectors_per_fat;
+    sectors_per_cluster = info->old.sectors_per_cluster;
+    serial_number = info->serial_number;
+    root_dir = info->root_directory;
+    reserved_sectors = info->old.reserved_sectors;
+    fat_copies = info->old.fat_copies;
+
+    data_region_offset = reserved_sectors + fat_copies * sectors_per_fat;
+    fat = (cluster_t*)k_malloc(SECTOR_SIZE * sectors_per_fat);
+    // TODO: optimize
+    for (uint32_t i = 0; i < 4; ++i)
+        read_sector((char*)fat + i * SECTOR_SIZE, reserved_sectors + i);
+    for (uint32_t i = 4; i < sectors_per_fat; ++i)
+        memset((char*)fat + i * SECTOR_SIZE, 0x00, SECTOR_SIZE);
+
+    int i = 0;
+    while (i < 11 && info->label[i] != 0x20) {
+        label[i] = info->label[i];
+        ++i;
+    }
+    label[i] = 0x00;
+
+    read_sector(buf, info->fs_info_sector);
+
+    auto* fsinfo = reinterpret_cast<fs_info_sector*>(buf);
+    free_clusters = fsinfo->free_clusters;
+    next_free_cluster_hint = fsinfo->next_free_cluster;
+
+    k_free(buf);
+
+    size_t _root_dir_clusters = 1;
+    cluster_t next = root_dir;
+    while ((next = fat[next]) < EOC)
+        ++_root_dir_clusters;
+    auto* n = cache_inode({ INODE_MNT | INODE_DIR }, 0777, _root_dir_clusters * sectors_per_cluster * SECTOR_SIZE, (void*)root_dir);
+    register_root_node(n);
+}
+
+fat32::~fat32()
+{
+    k_free(fat);
+}
+
+size_t fat32::inode_read(inode* file, char* buf, size_t buf_size, size_t offset, size_t n)
+{
+    cluster_t next = reinterpret_cast<cluster_t>(file->impl);
+    uint32_t cluster_size = SECTOR_SIZE * sectors_per_cluster;
+    auto* b = (char*)k_malloc(cluster_size);
+    size_t orig_n = n;
+
+    do {
+        if (offset == 0) {
+            if (n > cluster_size) {
+                read_cluster(buf, next);
+                buf_size -= cluster_size;
+                buf += cluster_size;
+                n -= cluster_size;
+            } else {
+                read_cluster(b, next);
+                auto read = _write_buf_n(buf, buf_size, b, n);
+                k_free(b);
+                return orig_n - n + read;
+            }
+        } else {
+            if (offset > cluster_size) {
+                offset -= cluster_size;
+            } else {
+                read_cluster(b, next);
+
+                auto to_read = cluster_size - offset;
+                if (to_read > n)
+                    to_read = n;
+
+                auto read = _write_buf_n(buf, buf_size, b + offset, to_read);
+                buf += read;
+                n -= read;
+
+                if (read != to_read) {
+                    k_free(b);
+                    return orig_n - n;
+                }
+
+                offset = 0;
+            }
+        }
+        next = fat[next];
+    } while (n && next < EOC);
+
+    k_free(b);
+    return orig_n - n;
+}
+
+int fat32::inode_stat(dentry* ent, stat* st)
+{
+    st->st_size = ent->ind->size;
+    st->st_blksize = 4096;
+    st->st_blocks = (ent->ind->size + 4095) / 4096;
+    st->st_ino = ent->ind->ino;
+    if (ent->ind->flags.in.special_node) {
+        st->st_rdev.v = reinterpret_cast<uint32_t>(ent->ind->impl);
+    }
+    return GB_OK;
+}
+} // namespace fs::fat

+ 92 - 29
src/kernel/hw/ata.cpp

@@ -1,4 +1,5 @@
 #include <asm/port_io.h>
+#include <fs/fat.hpp>
 #include <kernel/hw/ata.hpp>
 #include <kernel/stdio.h>
 #include <kernel/syscall.hpp>
@@ -126,6 +127,74 @@ int hw::ata::write_sector(const char* buf, uint32_t lba_low, uint16_t lba_high)
 }
 
 static hw::ata* ata_pri;
+static hw::ata* ata_sec;
+constexpr hw::ata** p_ata_pri = &ata_pri;
+constexpr hw::ata** p_ata_sec = &ata_sec;
+
+// data1: offset sectors
+// data2: limit sectors
+template <hw::ata** ata_bus>
+size_t _ata_read(fs::special_node* sn, char* buf, size_t buf_size, size_t offset, size_t n)
+{
+    // TODO: check buf_size
+    char b[512] {};
+    char* orig_buf = buf;
+    size_t start = sn->data1 + offset / 512;
+    size_t end = sn->data1 + (offset + n + 511) / 512;
+    if (end > sn->data1 + sn->data2)
+        end = sn->data1 + sn->data2;
+    offset %= 512;
+    for (size_t i = start; i < end; ++i) {
+        (*ata_bus)->read_sector(b, i, 0);
+        size_t to_copy = 0;
+        if (offset)
+            to_copy = 512 - offset;
+        else
+            to_copy = n > 512 ? 512 : n;
+        memcpy(buf, b + offset, to_copy);
+        offset = 0;
+        buf += to_copy;
+        n -= to_copy;
+    }
+    return buf - orig_buf;
+}
+
+struct PACKED mbr_part_entry {
+    uint8_t attr;
+    uint8_t chs_start[3];
+    uint8_t type;
+    uint8_t chs_end[3];
+    uint32_t lba_start;
+    uint32_t cnt;
+};
+
+struct PACKED mbr {
+    uint8_t code[440];
+    uint32_t signature;
+    uint16_t reserved;
+    struct mbr_part_entry parts[4];
+    uint16_t magic;
+};
+
+static inline void mbr_part_probe(fs::inode* drive, uint16_t major, uint16_t minor)
+{
+    struct mbr hda_mbr { };
+    auto* dev = fs::vfs_open("/dev");
+
+    fs::vfs_read(drive, (char*)&hda_mbr, 512, 0, 512);
+
+    for (const auto& part : hda_mbr.parts) {
+        if (!part.type)
+            continue;
+
+        fs::register_special_block(major, minor++,
+            _ata_read<p_ata_pri>,
+            nullptr,
+            part.lba_start, part.cnt);
+
+        fs::vfs_mknode(dev, "hda1", { .in { .major = 2, .minor = 1 } });
+    }
+}
 
 void hw::init_ata(void* data)
 {
@@ -133,37 +202,31 @@ void hw::init_ata(void* data)
         syscall(0x03);
 
     ata_pri = types::kernel_allocator_new<ata>(ATA_PRIMARY_BUS_BASE);
-    ata_pri->identify();
-    ata_pri->select(true);
+    if (ata_pri->identify())
+        ata_pri->select(true);
+
+    ata_sec = types::kernel_allocator_new<ata>(ATA_SECONDARY_BUS_BASE);
+    if (ata_pri->identify())
+        ata_pri->select(true);
 
+    // data1: offset sectors
+    // data2: limit sectors
     fs::register_special_block(
         2, 0,
-        [](fs::special_node*, char* buf, size_t buf_size, size_t offset, size_t n) -> size_t {
-            // TODO: check buf_size
-            char b[512] {};
-            char* orig_buf = buf;
-            size_t start = offset / 512;
-            size_t end = (offset + n + 511) / 512;
-            for (size_t i = start; i < end; ++i) {
-                ata_pri->read_sector(b, i, 0);
-                size_t to_copy = 0;
-                if (offset)
-                    to_copy = 512 - offset;
-                else
-                    to_copy = n > 512 ? 512 : n;
-                memcpy(buf, b + offset, to_copy);
-                offset = 0;
-                buf += to_copy;
-                n -= to_copy;
-            }
-            return buf - orig_buf;
-        },
-        [](fs::special_node*, const char* buf, size_t offset, size_t n) -> size_t {
-            syscall(0x03);
-            return n;
-        }, 0, 0);
+        _ata_read<p_ata_pri>,
+        nullptr,
+        0,
+        0xffffffff);
+
+    // data1: offset sectors
+    // data2: limit sectors
+    fs::register_special_block(
+        2, 8,
+        _ata_read<p_ata_sec>,
+        nullptr,
+        0,
+        0xffffffff);
+
     auto* hda = fs::vfs_open("/dev/hda");
-    char buf[512] {};
-    fs::vfs_read(hda, buf, 512, 1, 512);
-    fs::vfs_write(hda, buf, 1, 512);
+    mbr_part_probe(hda->ind, 2, 1);
 }

+ 183 - 138
src/kernel/vfs.cpp

@@ -21,36 +21,119 @@ struct tmpfs_file_entry {
     char filename[128];
 };
 
+fs::vfs::dentry::dentry(dentry* _parent, inode* _ind, const name_type& _name)
+    : parent(_parent)
+    , ind(_ind)
+    , flags { 0 }
+    , name(_name)
+{
+}
+fs::vfs::dentry::dentry(dentry* _parent, inode* _ind, name_type&& _name)
+    : parent(_parent)
+    , ind(_ind)
+    , flags { 0 }
+    , name(types::move(_name))
+{
+}
+fs::vfs::dentry::dentry(dentry&& val)
+    : children(types::move(val.children))
+    , idx_children(types::move(val.idx_children))
+    , parent(val.parent)
+    , ind(val.ind)
+    , flags { val.flags }
+    , name(types::move(val.name))
+{
+    for (auto& item : children)
+        item.parent = this;
+}
+fs::vfs::dentry* fs::vfs::dentry::append(inode* ind, const name_type& name)
+{
+    auto iter = children.emplace_back(this, ind, name);
+    idx_children.insert(iter->name, iter.ptr());
+    return iter.ptr();
+}
+fs::vfs::dentry* fs::vfs::dentry::append(inode* ind, name_type&& name)
+{
+    auto iter = children.emplace_back(this, ind, types::move(name));
+    idx_children.insert(iter->name, iter.ptr());
+    return iter.ptr();
+}
+fs::vfs::dentry* fs::vfs::dentry::find(const name_type& name)
+{
+    if (ind->flags.in.directory && !flags.in.present)
+        ind->fs->load_dentry(this);
+
+    auto iter = idx_children.find(name);
+    if (!iter) {
+        errno = ENOTFOUND;
+        return nullptr;
+    }
+
+    return iter->value;
+}
+fs::vfs::dentry* fs::vfs::dentry::replace(dentry* val)
+{
+    // TODO: prevent the dirent to be swapped out of memory
+    parent->idx_children.find(this->name)->value = val;
+    return this;
+}
+void fs::vfs::dentry::invalidate(void)
+{
+    // TODO: write back
+    flags.in.dirty = 0;
+    children.clear();
+    idx_children.clear();
+    flags.in.present = 0;
+}
 fs::vfs::vfs(void)
-    : _root_inode(nullptr)
-    , _last_inode_no(0)
+    : _last_inode_no(0)
+    , _root(nullptr, nullptr, "/")
 {
 }
 fs::ino_t fs::vfs::_assign_inode_id(void)
 {
     return ++_last_inode_no;
 }
-fs::inode* fs::vfs::cache_inode(inode_flags flags, uint32_t perm, void* impl_data)
+fs::inode* fs::vfs::cache_inode(inode_flags flags, uint32_t perm, size_t size, void* impl_data)
 {
-    auto iter = _inodes.emplace_back(inode { flags, perm, impl_data, _assign_inode_id(), this });
+    auto iter = _inodes.emplace_back(inode { flags, perm, impl_data, _assign_inode_id(), this, size });
     _idx_inodes.insert(iter->ino, iter.ptr());
     return iter.ptr();
 }
 fs::inode* fs::vfs::get_inode(ino_t ino)
 {
     auto iter = _idx_inodes.find(ino);
-    if (iter != iter.npos)
-        return iter->value;
-    else
+    // TODO: load inode from disk if not found
+    if (!iter)
         return nullptr;
+    else
+        return iter->value;
 }
 void fs::vfs::register_root_node(inode* root)
 {
-    _root_inode = root;
+    if (!_root.ind)
+        _root.ind = root;
 }
-fs::inode* fs::vfs::root(void) const
+int fs::vfs::load_dentry(dentry*)
 {
-    return _root_inode;
+    syscall(0x03);
+    return GB_FAILED;
+}
+int fs::vfs::mount(dentry* mnt, vfs* new_fs)
+{
+    if (!mnt->ind->flags.in.directory) {
+        errno = ENOTDIR;
+        return GB_FAILED;
+    }
+
+    auto* new_ent = new_fs->root();
+
+    new_ent->parent = mnt->parent;
+    new_ent->name = mnt->name;
+
+    auto* orig_ent = mnt->replace(new_ent);
+    _mount_recover_list.insert(new_ent, orig_ent);
+    return GB_OK;
 }
 size_t fs::vfs::inode_read(inode*, char*, size_t, size_t, size_t)
 {
@@ -62,45 +145,27 @@ size_t fs::vfs::inode_write(inode*, const char*, size_t, size_t)
     syscall(0x03);
     return 0xffffffff;
 }
-int fs::vfs::inode_readdir(inode*, dirent*, size_t)
-{
-    syscall(0x03);
-    return GB_FAILED;
-}
-fs::inode* fs::vfs::inode_findinode(inode* dir, const char* filename)
-{
-    fs::dirent ent {};
-    size_t i = 0;
-    // TODO: if the inode is a mount point, the ino MIGHT BE THE SAME
-    while (inode_readdir(dir, &ent, i) == GB_OK) {
-        if (strcmp(ent.name, filename) == 0) {
-            return get_inode(ent.ino);
-        }
-        ++i;
-    }
-    return nullptr;
-}
-int fs::vfs::inode_mkfile(inode*, const char*)
+int fs::vfs::inode_mkfile(dentry*, const char*)
 {
     syscall(0x03);
     return GB_FAILED;
 }
-int fs::vfs::inode_mknode(inode*, const char*, node_t)
+int fs::vfs::inode_mknode(dentry*, const char*, node_t)
 {
     syscall(0x03);
     return GB_FAILED;
 }
-int fs::vfs::inode_rmfile(inode*, const char*)
+int fs::vfs::inode_rmfile(dentry*, const char*)
 {
     syscall(0x03);
     return GB_FAILED;
 }
-int fs::vfs::inode_mkdir(inode*, const char*)
+int fs::vfs::inode_mkdir(dentry*, const char*)
 {
     syscall(0x03);
     return GB_FAILED;
 }
-int fs::vfs::inode_stat(inode*, stat*, const char*)
+int fs::vfs::inode_stat(dentry*, stat*)
 {
     syscall(0x03);
     return GB_FAILED;
@@ -127,12 +192,28 @@ protected:
         };
         snprintf(ent.filename, sizeof(ent.filename), filename);
         fes->push_back(ent);
+        dir->size += sizeof(tmpfs_file_entry);
+    }
+
+    virtual int load_dentry(dentry* ent) override
+    {
+        if (!ent->ind->flags.in.directory) {
+            errno = ENOTDIR;
+            return GB_FAILED;
+        }
+
+        auto& entries = *static_cast<vector<tmpfs_file_entry>*>(ent->ind->impl);
+        for (const auto& entry : entries)
+            ent->append(get_inode(entry.ino), entry.filename);
+
+        ent->flags.in.present = 1;
+        return GB_OK;
     }
 
 public:
     explicit tmpfs(void)
     {
-        auto& in = *cache_inode({ INODE_DIR | INODE_MNT }, 0777, mk_fe_vector());
+        auto& in = *cache_inode({ INODE_DIR | INODE_MNT }, 0777, 0, mk_fe_vector());
 
         mklink(&in, &in, ".");
         mklink(&in, &in, "..");
@@ -140,27 +221,31 @@ public:
         register_root_node(&in);
     }
 
-    virtual int inode_mkfile(fs::inode* dir, const char* filename) override
+    virtual int inode_mkfile(dentry* dir, const char* filename) override
     {
-        auto& file = *cache_inode({ .v = INODE_FILE }, 0777, mk_data_vector());
-        mklink(dir, &file, filename);
+        auto& file = *cache_inode({ .v = INODE_FILE }, 0777, 0, mk_data_vector());
+        mklink(dir->ind, &file, filename);
+        dir->invalidate();
         return GB_OK;
     }
 
-    virtual int inode_mknode(fs::inode* dir, const char* filename, fs::node_t sn) override
+    virtual int inode_mknode(dentry* dir, const char* filename, fs::node_t sn) override
     {
-        auto& node = *cache_inode({ .v = INODE_NODE }, 0777, (void*)sn.v);
-        mklink(dir, &node, filename);
+        auto& node = *cache_inode({ .v = INODE_NODE }, 0777, 0, (void*)sn.v);
+        mklink(dir->ind, &node, filename);
+        dir->invalidate();
         return GB_OK;
     }
 
-    virtual int inode_mkdir(fs::inode* dir, const char* dirname) override
+    virtual int inode_mkdir(dentry* dir, const char* dirname) override
     {
-        auto& new_dir = *cache_inode({ .v = INODE_DIR }, 0777, mk_fe_vector());
+        auto& new_dir = *cache_inode({ .v = INODE_DIR }, 0777, 0, mk_fe_vector());
         mklink(&new_dir, &new_dir, ".");
 
-        mklink(dir, &new_dir, dirname);
-        mklink(&new_dir, dir, "..");
+        mklink(dir->ind, &new_dir, dirname);
+        mklink(&new_dir, dir->ind, "..");
+
+        dir->invalidate();
         return GB_OK;
     }
 
@@ -199,48 +284,21 @@ public:
         return n;
     }
 
-    virtual int inode_readdir(fs::inode* dir, fs::dirent* entry, size_t i) override
-    {
-        if (dir->flags.in.directory != 1) {
-            errno = ENOTDIR;
-            return GB_FAILED;
-        }
-
-        auto* fes = static_cast<vector<tmpfs_file_entry>*>(dir->impl);
-
-        if (i >= fes->size()) {
-            errno = ENOENT;
-            return GB_FAILED;
-        }
-
-        entry->ino = fes->at(i).ino;
-        snprintf(entry->name, sizeof(entry->name), fes->at(i).filename);
-
-        return GB_OK;
-    }
-
-    virtual int inode_stat(fs::inode* dir, fs::stat* stat, const char* filename) override
+    virtual int inode_stat(dentry* dir, fs::stat* stat) override
     {
-        // for later use
-        // auto* fes = static_cast<vector<struct tmpfs_file_entry>*>(dir->impl);
-
-        auto* file_inode = vfs_findinode(dir, filename);
-
-        if (!file_inode) {
-            errno = ENOENT;
-            return GB_FAILED;
-        }
+        auto* file_inode = dir->ind;
 
         stat->st_ino = file_inode->ino;
+        stat->st_size = file_inode->size;
         if (file_inode->flags.in.file) {
             stat->st_rdev.v = 0;
             stat->st_blksize = 1;
-            stat->st_blocks = static_cast<vector<char>*>(file_inode->impl)->size();
+            stat->st_blocks = file_inode->size;
         }
         if (file_inode->flags.in.directory) {
             stat->st_rdev.v = 0;
             stat->st_blksize = sizeof(tmpfs_file_entry);
-            stat->st_blocks = static_cast<vector<tmpfs_file_entry>*>(file_inode->impl)->size();
+            stat->st_blocks = file_inode->size;
         }
         if (file_inode->flags.in.special_node) {
             stat->st_rdev.v = (uint32_t)file_inode->impl;
@@ -291,32 +349,24 @@ size_t fs::vfs_write(fs::inode* file, const char* buf, size_t offset, size_t n)
         return file->fs->inode_write(file, buf, offset, n);
     }
 }
-int fs::vfs_readdir(fs::inode* dir, fs::dirent* entry, size_t i)
+int fs::vfs_mkfile(fs::vfs::dentry* dir, const char* filename)
 {
-    return dir->fs->inode_readdir(dir, entry, i);
+    return dir->ind->fs->inode_mkfile(dir, filename);
 }
-fs::inode* fs::vfs_findinode(fs::inode* dir, const char* filename)
+int fs::vfs_mknode(fs::vfs::dentry* dir, const char* filename, fs::node_t sn)
 {
-    return dir->fs->inode_findinode(dir, filename);
+    return dir->ind->fs->inode_mknode(dir, filename, sn);
 }
-int fs::vfs_mkfile(fs::inode* dir, const char* filename)
+int fs::vfs_rmfile(fs::vfs::dentry* dir, const char* filename)
 {
-    return dir->fs->inode_mkfile(dir, filename);
+    return dir->ind->fs->inode_rmfile(dir, filename);
 }
-int fs::vfs_mknode(fs::inode* dir, const char* filename, fs::node_t sn)
+int fs::vfs_mkdir(fs::vfs::dentry* dir, const char* dirname)
 {
-    return dir->fs->inode_mknode(dir, filename, sn);
-}
-int fs::vfs_rmfile(fs::inode* dir, const char* filename)
-{
-    return dir->fs->inode_rmfile(dir, filename);
-}
-int fs::vfs_mkdir(fs::inode* dir, const char* dirname)
-{
-    return dir->fs->inode_mkdir(dir, dirname);
+    return dir->ind->fs->inode_mkdir(dir, dirname);
 }
 
-fs::inode* fs::vfs_open(const char* path)
+fs::vfs::dentry* fs::vfs_open(const char* path)
 {
     if (path[0] == '/' && path[1] == 0x00) {
         return fs::fs_root;
@@ -329,13 +379,11 @@ fs::inode* fs::vfs_open(const char* path)
     case '/':
         while (true) {
             if (path[n] == 0x00) {
-                string fname(path, n);
-                cur = vfs_findinode(cur, fname.c_str());
+                cur = cur->find(string(path, n));
                 return cur;
             }
             if (path[n] == '/') {
-                string fname(path, n);
-                cur = vfs_findinode(cur, fname.c_str());
+                cur = cur->find(string(path, n));
                 if (path[n + 1] == 0x00) {
                     return cur;
                 } else {
@@ -358,31 +406,18 @@ fs::inode* fs::vfs_open(const char* path)
     }
     return nullptr;
 }
-
-int fs::vfs_stat(struct stat* stat, const char* _path)
+int fs::vfs_stat(const char* filename, stat* stat)
 {
-    if (_path[0] == '/' && _path[1] == 0x00)
-        return fs_root->fs->inode_stat(fs_root, stat, ".");
-
-    string path(_path);
-    auto iter = path.back();
-    while (*(iter - 1) != '/')
-        --iter;
-    string filename(&*iter);
-    string parent_path = path.substr(0, &*iter - path.data());
-
-    auto* dir_inode = vfs_open(parent_path.c_str());
-
-    if (!dir_inode) {
-        errno = ENOENT;
-        return GB_FAILED;
-    }
-
-    return dir_inode->fs->inode_stat(dir_inode, stat, filename.c_str());
+    auto ent = vfs_open(filename);
+    return vfs_stat(ent, stat);
+}
+int fs::vfs_stat(fs::vfs::dentry* ent, stat* stat)
+{
+    return ent->ind->fs->inode_stat(ent, stat);
 }
 
-fs::inode* fs::fs_root;
-static tmpfs* rootfs;
+fs::vfs::dentry* fs::fs_root;
+static types::list<fs::vfs*>* fs_es;
 
 void fs::register_special_block(
     uint16_t major,
@@ -399,6 +434,12 @@ void fs::register_special_block(
     sn.data2 = data2;
 }
 
+fs::vfs* fs::register_fs(vfs* fs)
+{
+    fs_es->push_back(fs);
+    return fs;
+}
+
 size_t b_null_read(fs::special_node*, char* buf, size_t buf_size, size_t, size_t n)
 {
     if (n >= buf_size)
@@ -413,31 +454,35 @@ size_t b_null_write(fs::special_node*, const char*, size_t, size_t n)
 
 void init_vfs(void)
 {
+    using namespace fs;
     // null
-    fs::register_special_block(0, 0, b_null_read, b_null_write, 0, 0);
+    register_special_block(0, 0, b_null_read, b_null_write, 0, 0);
+
+    fs_es = types::kernel_allocator_new<types::list<vfs*>>();
 
-    rootfs = allocator_traits<kernel_allocator<tmpfs>>::allocate_and_construct();
-    fs::fs_root = rootfs->root();
+    auto* rootfs = types::kernel_allocator_new<tmpfs>();
+    fs_es->push_back(rootfs);
+    fs_root = rootfs->root();
 
-    fs::vfs_mkdir(fs::fs_root, "dev");
-    fs::vfs_mkdir(fs::fs_root, "root");
-    fs::vfs_mkfile(fs::fs_root, "init");
+    vfs_mkdir(fs_root, "dev");
+    vfs_mkdir(fs_root, "root");
+    vfs_mkfile(fs_root, "init");
 
-    auto* init = fs::vfs_open("/init");
+    auto* init = vfs_open("/init");
     const char* str = "#/bin/sh\nexec /bin/sh\n";
-    fs::vfs_write(init, str, 0, strlen(str));
+    vfs_write(init->ind, str, 0, strlen(str));
 
-    auto* dev = fs::vfs_open("/dev");
-    fs::vfs_mknode(dev, "null", { .in { .major = 0, .minor = 0 } });
-    fs::vfs_mknode(dev, "console", { .in { .major = 1, .minor = 0 } });
-    fs::vfs_mknode(dev, "hda", { .in { .major = 2, .minor = 0 } });
+    auto* dev = vfs_open("/dev");
+    vfs_mknode(dev, "null", { .in { .major = 0, .minor = 0 } });
+    vfs_mknode(dev, "console", { .in { .major = 1, .minor = 0 } });
+    vfs_mknode(dev, "hda", { .in { .major = 2, .minor = 0 } });
 
-    fs::stat _stat {};
+    stat _stat {};
 
-    fs::vfs_stat(&_stat, "/init");
-    fs::vfs_stat(&_stat, "/");
-    fs::vfs_stat(&_stat, "/dev");
-    fs::vfs_stat(&_stat, "/dev/null");
-    fs::vfs_stat(&_stat, "/dev/console");
-    fs::vfs_stat(&_stat, "/dev/hda");
+    vfs_stat("/init", &_stat);
+    vfs_stat("/", &_stat);
+    vfs_stat("/dev", &_stat);
+    vfs_stat("/dev/null", &_stat);
+    vfs_stat("/dev/console", &_stat);
+    vfs_stat("/dev/hda", &_stat);
 }