Bladeren bron

feat: implement page cache for efficient vfs io

zhuowei shao 7 maanden geleden
bovenliggende
commit
af512033d5
7 gewijzigde bestanden met toevoegingen van 531 en 64 verwijderingen
  1. 38 4
      src/fs/ext4.rs
  2. 62 6
      src/fs/fat32.rs
  3. 42 45
      src/fs/tmpfs.rs
  4. 2 0
      src/kernel/mem.rs
  5. 46 4
      src/kernel/mem/page_alloc/raw_page.rs
  6. 324 0
      src/kernel/mem/page_cache.rs
  7. 17 5
      src/kernel/vfs/inode.rs

+ 38 - 4
src/fs/ext4.rs

@@ -1,5 +1,6 @@
-use core::sync::atomic::{AtomicU32, AtomicU64};
+use core::sync::atomic::{AtomicU32, AtomicU64, Ordering};
 
+use crate::kernel::mem::{PageCache, PageCacheBackend};
 use crate::{
     io::{Buffer, ByteBuffer},
     kernel::{
@@ -18,6 +19,7 @@ use crate::{
     path::Path,
     prelude::*,
 };
+use alloc::sync::Weak;
 use alloc::{
     collections::btree_map::{BTreeMap, Entry},
     sync::Arc,
@@ -92,7 +94,7 @@ impl Ext4Fs {
                 let mode = *idata.mode.get_mut();
                 if s_isreg(mode) {
                     vacant
-                        .insert(Ext4Inode::File(Arc::new(FileInode { idata })))
+                        .insert(Ext4Inode::File(FileInode::new(idata)))
                         .clone()
                         .into_inner()
                 } else if s_isdir(mode) {
@@ -103,7 +105,7 @@ impl Ext4Fs {
                 } else {
                     println_warn!("ext4: Unsupported inode type: {mode:#o}");
                     vacant
-                        .insert(Ext4Inode::File(Arc::new(FileInode { idata })))
+                        .insert(Ext4Inode::File(FileInode::new(idata)))
                         .clone()
                         .into_inner()
                 }
@@ -174,15 +176,47 @@ impl Ext4Inode {
 }
 
 define_struct_inode! {
-    struct FileInode;
+    struct FileInode {
+        page_cache: PageCache,
+    }
 }
 
 define_struct_inode! {
     struct DirInode;
 }
 
+impl FileInode {
+    fn new(idata: InodeData) -> Arc<Self> {
+        let size = idata.size.load(Ordering::Relaxed) as usize;
+        let inode = Arc::new_cyclic(|weak_self: &Weak<FileInode>| Self {
+            idata,
+            page_cache: PageCache::new(weak_self.clone(), size),
+        });
+
+        inode
+    }
+}
+
+impl PageCacheBackend for FileInode {
+    fn read_page(&self, page: &mut crate::kernel::mem::CachePage, offset: usize) -> KResult<usize> {
+        self.read_direct(page, offset)
+    }
+
+    fn write_page(&self, page: &crate::kernel::mem::CachePage, offset: usize) -> KResult<usize> {
+        todo!()
+    }
+}
+
 impl Inode for FileInode {
+    fn page_cache(&self) -> Option<&PageCache> {
+        Some(&self.page_cache)
+    }
+
     fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize> {
+        Task::block_on(self.page_cache.read(buffer, offset))
+    }
+
+    fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize> {
         let vfs = self.vfs.upgrade().ok_or(EIO)?;
         let ext4fs = vfs.as_any().downcast_ref::<Ext4Fs>().unwrap();
 

+ 62 - 6
src/fs/fat32.rs

@@ -1,13 +1,19 @@
 mod dir;
 mod file;
 
+use crate::io::Stream;
 use crate::kernel::constants::EIO;
+use crate::kernel::mem::AsMemoryBlock;
+use crate::kernel::vfs::inode::WriteOffset;
 use crate::{
     io::{Buffer, ByteBuffer, UninitBuffer},
     kernel::{
         block::{make_device, BlockDevice, BlockDeviceRequest},
         constants::{S_IFDIR, S_IFREG},
-        mem::paging::Page,
+        mem::{
+            paging::Page,
+            {CachePage, PageCache, PageCacheBackend},
+        },
         vfs::{
             dentry::Dentry,
             inode::{define_struct_inode, Ino, Inode, InodeData},
@@ -32,6 +38,8 @@ use file::ClusterRead;
 
 type ClusterNo = u32;
 
+const SECTOR_SIZE: usize = 512;
+
 #[derive(Clone, Copy)]
 #[repr(C, packed)]
 struct Bootsector {
@@ -231,13 +239,16 @@ impl FatInode {
 }
 
 define_struct_inode! {
-    struct FileInode;
+    struct FileInode {
+        page_cache: PageCache,
+    }
 }
 
 impl FileInode {
     fn new(ino: Ino, weak: Weak<FatFs>, size: u32) -> Arc<Self> {
-        let inode = Arc::new(Self {
+        let inode = Arc::new_cyclic(|weak_self: &Weak<FileInode>| Self {
             idata: InodeData::new(ino, weak),
+            page_cache: PageCache::new(weak_self.clone(), size as usize),
         });
 
         // Safety: We are initializing the inode
@@ -250,7 +261,15 @@ impl FileInode {
 }
 
 impl Inode for FileInode {
+    fn page_cache(&self) -> Option<&PageCache> {
+        Some(&self.page_cache)
+    }
+
     fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize> {
+        Task::block_on(self.page_cache.read(buffer, offset))
+    }
+
+    fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize> {
         let vfs = self.vfs.upgrade().ok_or(EIO)?;
         let vfs = vfs.as_any().downcast_ref::<FatFs>().unwrap();
         let fat = Task::block_on(vfs.fat.read());
@@ -259,16 +278,53 @@ impl Inode for FileInode {
             return Ok(0);
         }
 
-        let iter = ClusterIterator::new(fat.as_ref(), self.ino as ClusterNo).read(vfs, offset);
+        let cluster_size = vfs.sectors_per_cluster as usize * SECTOR_SIZE;
+        assert!(cluster_size <= 0x1000, "Cluster size is too large");
+
+        let skip_clusters = offset / cluster_size;
+        let inner_offset = offset % cluster_size;
+
+        let cluster_iter =
+            ClusterIterator::new(fat.as_ref(), self.ino as ClusterNo).skip(skip_clusters);
+
+        let buffer_page = Page::alloc();
+        for cluster in cluster_iter {
+            vfs.read_cluster(cluster, &buffer_page)?;
 
-        for data in iter {
-            if buffer.fill(data?)?.should_stop() {
+            let data = unsafe {
+                // SAFETY: We are the only one holding this page.
+                &buffer_page.as_memblk().as_bytes()[inner_offset..]
+            };
+
+            let end = offset + data.len();
+            let real_end = core::cmp::min(end, self.size.load(Ordering::Relaxed) as usize);
+            let real_size = real_end - offset;
+
+            if buffer.fill(&data[..real_size])?.should_stop() {
                 break;
             }
         }
 
         Ok(buffer.wrote())
     }
+
+    fn write(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult<usize> {
+        todo!()
+    }
+
+    fn write_direct(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult<usize> {
+        todo!()
+    }
+}
+
+impl PageCacheBackend for FileInode {
+    fn read_page(&self, page: &mut CachePage, offset: usize) -> KResult<usize> {
+        self.read_direct(page, offset)
+    }
+
+    fn write_page(&self, page: &CachePage, offset: usize) -> KResult<usize> {
+        todo!()
+    }
 }
 
 define_struct_inode! {

+ 42 - 45
src/fs/tmpfs.rs

@@ -1,6 +1,8 @@
 use crate::io::Stream;
 use crate::kernel::constants::{EEXIST, EINVAL, EIO, EISDIR, ENOENT, ENOSYS, ENOTDIR};
+use crate::kernel::mem::{CachePage, PageCache, PageCacheBackend};
 use crate::kernel::timer::Instant;
+use crate::kernel::vfs::inode::InodeData;
 use crate::kernel::vfs::inode::RenameData;
 use crate::{
     io::Buffer,
@@ -17,6 +19,7 @@ use crate::{
 };
 use alloc::sync::{Arc, Weak};
 use core::{ops::ControlFlow, sync::atomic::Ordering};
+use eonix_mm::paging::PAGE_SIZE;
 use eonix_runtime::task::Task;
 use eonix_sync::{AsProof as _, AsProofMut as _, Locked, Mutex, ProofMut};
 use itertools::Itertools;
@@ -58,7 +61,7 @@ impl Inode for NodeInode {
 }
 
 define_struct_inode! {
-    struct DirectoryInode {
+    pub(super) struct DirectoryInode {
         entries: Locked<Vec<(Arc<[u8]>, Ino)>, ()>,
     }
 }
@@ -460,40 +463,50 @@ impl Inode for SymlinkInode {
 }
 
 define_struct_inode! {
-    struct FileInode {
-        filedata: Locked<Vec<u8>, ()>,
+    pub struct FileInode {
+        pages: PageCache,
     }
 }
 
 impl FileInode {
-    fn new(ino: Ino, vfs: Weak<dyn Vfs>, mode: Mode) -> Arc<Self> {
-        Self::new_locked(ino, vfs, |inode, rwsem| unsafe {
-            addr_of_mut_field!(inode, filedata).write(Locked::new(vec![], rwsem));
-
-            addr_of_mut_field!(&mut *inode, mode).write((S_IFREG | (mode & 0o777)).into());
-            addr_of_mut_field!(&mut *inode, nlink).write(1.into());
-            addr_of_mut_field!(&mut *inode, ctime).write(Spin::new(Instant::now()));
-            addr_of_mut_field!(&mut *inode, mtime).write(Spin::new(Instant::now()));
-            addr_of_mut_field!(&mut *inode, atime).write(Spin::new(Instant::now()));
+    pub fn new(ino: Ino, vfs: Weak<dyn Vfs>, mode: Mode) -> Arc<Self> {
+        Arc::new_cyclic(|weak_self: &Weak<FileInode>| FileInode {
+            idata: {
+                let inode_data = InodeData::new(ino, vfs);
+                inode_data
+                    .mode
+                    .store(S_IFREG | (mode & 0o777), Ordering::Relaxed);
+                inode_data.nlink.store(1, Ordering::Relaxed);
+                inode_data
+            },
+            pages: PageCache::new(weak_self.clone(), 0),
         })
     }
 }
 
+impl PageCacheBackend for FileInode {
+    fn read_page(&self, _cache_page: &mut CachePage, _offset: usize) -> KResult<usize> {
+        Ok(PAGE_SIZE)
+    }
+
+    fn write_page(&self, _page: &CachePage, _offset: usize) -> KResult<usize> {
+        Ok(PAGE_SIZE)
+    }
+}
+
 impl Inode for FileInode {
-    fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize> {
-        // TODO: We don't need that strong guarantee, find some way to avoid locks
-        let lock = Task::block_on(self.rwsem.read());
+    fn page_cache(&self) -> Option<&PageCache> {
+        Some(&self.pages)
+    }
 
-        match self.filedata.access(lock.prove()).split_at_checked(offset) {
-            Some((_, data)) => buffer.fill(data).map(|result| result.allow_partial()),
-            None => Ok(0),
-        }
+    fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize> {
+        let lock = Task::block_on(self.rwsem.write());
+        Task::block_on(self.pages.read(buffer, offset))
     }
 
     fn write(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult<usize> {
         // TODO: We don't need that strong guarantee, find some way to avoid locks
         let lock = Task::block_on(self.rwsem.write());
-        let filedata = self.filedata.access_mut(lock.prove_mut());
 
         let mut store_new_end = None;
         let offset = match offset {
@@ -506,41 +519,25 @@ impl Inode for FileInode {
             }
         };
 
-        let mut pos = offset;
-        loop {
-            if pos >= filedata.len() {
-                filedata.resize(pos + 4096, 0);
-            }
-
-            match stream.poll_data(&mut filedata[pos..])? {
-                Some(data) => pos += data.len(),
-                None => break,
-            }
-        }
+        let wrote = Task::block_on(self.pages.write(stream, offset))?;
+        let cursor_end = offset + wrote;
 
-        filedata.resize(pos, 0);
         if let Some(store_end) = store_new_end {
-            *store_end = pos;
+            *store_end = cursor_end;
         }
 
         // SAFETY: `lock` has done the synchronization
-        self.size.store(pos as u64, Ordering::Relaxed);
         *self.mtime.lock() = Instant::now();
+        self.size.store(cursor_end as u64, Ordering::Relaxed);
 
-        Ok(pos - offset)
+        Ok(wrote)
     }
 
     fn truncate(&self, length: usize) -> KResult<()> {
-        // TODO: We don't need that strong guarantee, find some way to avoid locks
         let lock = Task::block_on(self.rwsem.write());
-        let filedata = self.filedata.access_mut(lock.prove_mut());
-
-        // SAFETY: `lock` has done the synchronization
+        Task::block_on(self.pages.resize(length))?;
         self.size.store(length as u64, Ordering::Relaxed);
         *self.mtime.lock() = Instant::now();
-
-        filedata.resize(length, 0);
-
         Ok(())
     }
 
@@ -559,7 +556,7 @@ impl Inode for FileInode {
 }
 
 impl_any!(TmpFs);
-struct TmpFs {
+pub(super) struct TmpFs {
     next_ino: AtomicIno,
     readonly: bool,
     rename_lock: Mutex<()>,
@@ -580,11 +577,11 @@ impl Vfs for TmpFs {
 }
 
 impl TmpFs {
-    fn assign_ino(&self) -> Ino {
+    pub(super) fn assign_ino(&self) -> Ino {
         self.next_ino.fetch_add(1, Ordering::AcqRel)
     }
 
-    pub fn create(readonly: bool) -> KResult<(Arc<dyn Vfs>, Arc<dyn Inode>)> {
+    pub fn create(readonly: bool) -> KResult<(Arc<TmpFs>, Arc<DirectoryInode>)> {
         let tmpfs = Arc::new(Self {
             next_ino: AtomicIno::new(1),
             readonly,

+ 2 - 0
src/kernel/mem.rs

@@ -6,9 +6,11 @@ mod allocator;
 mod mm_area;
 mod mm_list;
 mod page_alloc;
+mod page_cache;
 
 pub use access::{AsMemoryBlock, MemoryBlock, PhysAccess};
 pub(self) use mm_area::MMArea;
 pub use mm_list::{handle_kernel_page_fault, FileMapping, MMList, Mapping, Permission};
 pub use page_alloc::{GlobalPageAlloc, RawPage};
+pub use page_cache::{CachePage, PageCache, PageCacheBackend};
 pub use paging::{Page, PageBuffer};

+ 46 - 4
src/kernel/mem/page_alloc/raw_page.rs

@@ -10,8 +10,8 @@ use eonix_mm::{
 use intrusive_list::{container_of, Link};
 use slab_allocator::SlabRawPage;
 
-use crate::kernel::mem::access::RawPageAccess;
-use crate::kernel::mem::PhysAccess;
+use crate::kernel::mem::{access::RawPageAccess, page_cache::PageCacheRawPage, MemoryBlock};
+use crate::kernel::mem::{AsMemoryBlock, PhysAccess};
 
 const PAGE_ARRAY: NonNull<RawPage> =
     unsafe { NonNull::new_unchecked(0xffffff8040000000 as *mut _) };
@@ -32,11 +32,16 @@ impl SlabPageInner {
     }
 }
 
+struct PageCacheInner {
+    valid_size: usize,
+}
+
 pub struct BuddyPageInner {}
 
 enum PageType {
     Buddy(BuddyPageInner),
     Slab(SlabPageInner),
+    PageCache(PageCacheInner),
 }
 
 impl PageType {
@@ -47,6 +52,14 @@ impl PageType {
             unreachable!()
         }
     }
+
+    fn page_cache_data(&mut self) -> &mut PageCacheInner {
+        if let PageType::PageCache(cache_data) = self {
+            return cache_data;
+        } else {
+            unreachable!()
+        }
+    }
 }
 
 pub struct RawPage {
@@ -70,8 +83,8 @@ impl PageFlags {
     pub const PRESENT: u32 = 1 << 0;
     // pub const LOCKED: u32 = 1 << 1;
     pub const BUDDY: u32 = 1 << 2;
-    // pub const SLAB: u32 = 1 << 3;
-    // pub const DIRTY: u32 = 1 << 4;
+    pub const SLAB: u32 = 1 << 3;
+    pub const DIRTY: u32 = 1 << 4;
     pub const FREE: u32 = 1 << 5;
     pub const LOCAL: u32 = 1 << 6;
 
@@ -226,3 +239,32 @@ impl SlabRawPage for RawPagePtr {
         self.as_mut().shared_data = PageType::Slab(SlabPageInner::new(first_free));
     }
 }
+
+impl PageCacheRawPage for RawPagePtr {
+    fn valid_size(&self) -> &mut usize {
+        &mut self.as_mut().shared_data.page_cache_data().valid_size
+    }
+
+    fn is_dirty(&self) -> bool {
+        self.flags().has(PageFlags::DIRTY)
+    }
+
+    fn clear_dirty(&self) {
+        self.flags().clear(PageFlags::DIRTY);
+    }
+
+    fn set_dirty(&self) {
+        self.flags().set(PageFlags::DIRTY);
+    }
+
+    fn cache_init(&self) {
+        self.as_mut().shared_data = PageType::PageCache(PageCacheInner { valid_size: 0 });
+    }
+}
+
+/// SAFETY: `RawPagePtr` is a pointer to a valid `RawPage` struct.
+impl AsMemoryBlock for RawPagePtr {
+    fn as_memblk(&self) -> MemoryBlock {
+        unsafe { MemoryBlock::new(self.real_ptr::<()>().addr(), PAGE_SIZE) }
+    }
+}

+ 324 - 0
src/kernel/mem/page_cache.rs

@@ -0,0 +1,324 @@
+use super::access::AsMemoryBlock;
+use crate::{
+    io::{Buffer, FillResult, Stream},
+    kernel::mem::page_alloc::RawPagePtr,
+    prelude::KResult,
+    GlobalPageAlloc,
+};
+use align_ext::AlignExt;
+use alloc::{collections::btree_map::BTreeMap, sync::Weak};
+use eonix_log::println_debug;
+
+use core::sync::atomic::{AtomicUsize, Ordering};
+use eonix_mm::paging::{PageAlloc, RawPage, PAGE_SIZE, PAGE_SIZE_BITS};
+use eonix_sync::Mutex;
+
+pub struct PageCache {
+    pages: Mutex<BTreeMap<usize, CachePage>>,
+    size: AtomicUsize,
+    backend: Weak<dyn PageCacheBackend>,
+}
+
+unsafe impl Send for PageCache {}
+unsafe impl Sync for PageCache {}
+
+#[derive(Clone, Copy)]
+pub struct CachePage(RawPagePtr);
+
+impl Buffer for CachePage {
+    fn total(&self) -> usize {
+        PAGE_SIZE
+    }
+
+    fn wrote(&self) -> usize {
+        self.valid_size()
+    }
+
+    fn fill(&mut self, data: &[u8]) -> KResult<FillResult> {
+        let valid_size = self.valid_size();
+        let available = &mut self.all_mut()[valid_size..];
+        if available.len() == 0 {
+            return Ok(FillResult::Full);
+        }
+
+        let len = core::cmp::min(data.len(), available.len());
+        available[..len].copy_from_slice(&data[..len]);
+
+        *self.0.valid_size() += len;
+
+        if len < data.len() {
+            Ok(FillResult::Partial(len))
+        } else {
+            Ok(FillResult::Done(len))
+        }
+    }
+}
+
+impl CachePage {
+    pub fn new() -> Self {
+        let page = GlobalPageAlloc.alloc().unwrap();
+        page.cache_init();
+        Self(page)
+    }
+
+    pub fn new_zeroed() -> Self {
+        let page = GlobalPageAlloc.alloc().unwrap();
+        // SAFETY: We own the page exclusively, so we can safely zero it.
+        unsafe {
+            page.as_memblk().as_bytes_mut().fill(0);
+        }
+        page.cache_init();
+        Self(page)
+    }
+
+    pub fn valid_size(&self) -> usize {
+        *self.0.valid_size()
+    }
+
+    pub fn set_valid_size(&mut self, valid_size: usize) {
+        *self.0.valid_size() = valid_size;
+    }
+
+    pub fn all(&self) -> &[u8] {
+        unsafe {
+            // SAFETY: The page is exclusively owned by us, so we can safely access its data.
+            self.0.as_memblk().as_bytes()
+        }
+    }
+
+    pub fn all_mut(&mut self) -> &mut [u8] {
+        unsafe {
+            // SAFETY: The page is exclusively owned by us, so we can safely access its data.
+            self.0.as_memblk().as_bytes_mut()
+        }
+    }
+
+    pub fn valid_data(&self) -> &[u8] {
+        &self.all()[..self.valid_size()]
+    }
+
+    pub fn is_dirty(&self) -> bool {
+        self.0.is_dirty()
+    }
+
+    pub fn set_dirty(&self) {
+        self.0.set_dirty();
+    }
+
+    pub fn clear_dirty(&self) {
+        self.0.clear_dirty();
+    }
+}
+
+impl PageCache {
+    pub fn new(backend: Weak<dyn PageCacheBackend>, size: usize) -> Self {
+        Self {
+            pages: Mutex::new(BTreeMap::new()),
+            backend: backend,
+            size: AtomicUsize::new(size),
+        }
+    }
+
+    pub async fn read(&self, buffer: &mut dyn Buffer, mut offset: usize) -> KResult<usize> {
+        let mut pages = self.pages.lock().await;
+
+        // println_debug!("before pagecache read, {}", buffer.available());
+
+        loop {
+            let page_id = offset >> PAGE_SIZE_BITS;
+            let page = pages.get(&page_id);
+
+            match page {
+                Some(page) => {
+                    let inner_offset = offset % PAGE_SIZE;
+                    // println_debug!(
+                    //     "pagecache try fill data {}",
+                    //     page.valid_data()[inner_offset..].len()
+                    // );
+
+                    // Todo: still cause uncessary IO if valid_size < PAGESIZE and fill result is Done
+                    if page.valid_size() == 0
+                        || buffer
+                            .fill(&page.valid_data()[inner_offset..])?
+                            .should_stop()
+                        || buffer.available() == 0
+                    {
+                        break;
+                    }
+
+                    offset += PAGE_SIZE - inner_offset;
+                }
+                None => {
+                    let mut new_page = CachePage::new();
+                    // println_debug!("page cache try get {}", offset.align_down(PAGE_SIZE));
+                    self.backend
+                        .upgrade()
+                        .unwrap()
+                        .read_page(&mut new_page, offset.align_down(PAGE_SIZE))?;
+                    pages.insert(page_id, new_page);
+                }
+            }
+        }
+        // println_debug!("after page cache read{}", buffer.available());
+
+        Ok(buffer.wrote())
+    }
+
+    pub async fn write(&self, stream: &mut dyn Stream, mut offset: usize) -> KResult<usize> {
+        let mut pages = self.pages.lock().await;
+        let old_size = self.size.load(Ordering::Relaxed);
+        let mut wrote = 0;
+
+        loop {
+            let page_id = offset >> PAGE_SIZE_BITS;
+            let page = pages.get_mut(&page_id);
+
+            match page {
+                Some(page) => {
+                    let inner_offset = offset % PAGE_SIZE;
+                    let cursor_end = match stream.poll_data(&mut page.all_mut()[inner_offset..])? {
+                        Some(buf) => {
+                            wrote += buf.len();
+                            inner_offset + buf.len()
+                        }
+                        None => {
+                            break;
+                        }
+                    };
+
+                    if page.valid_size() < cursor_end {
+                        page.set_valid_size(cursor_end);
+                    }
+                    page.set_dirty();
+                    offset += PAGE_SIZE - inner_offset;
+                }
+                None => {
+                    let new_page = if (offset >> PAGE_SIZE_BITS) > (old_size >> PAGE_SIZE_BITS) {
+                        let new_page = CachePage::new_zeroed();
+                        new_page
+                    } else {
+                        let mut new_page = CachePage::new();
+                        self.backend
+                            .upgrade()
+                            .unwrap()
+                            .read_page(&mut new_page, offset.align_down(PAGE_SIZE))?;
+                        new_page
+                    };
+
+                    pages.insert(page_id, new_page);
+                }
+            }
+        }
+
+        let cursor_end = offset + wrote;
+        self.size.fetch_max(cursor_end, Ordering::Relaxed);
+        Ok(wrote)
+    }
+
+    pub async fn fsync(&self) -> KResult<()> {
+        let pages = self.pages.lock().await;
+        for (page_id, page) in pages.iter() {
+            if page.is_dirty() {
+                self.backend
+                    .upgrade()
+                    .unwrap()
+                    .write_page(page, page_id << PAGE_SIZE_BITS)?;
+                page.clear_dirty();
+            }
+        }
+        Ok(())
+    }
+
+    // This function is used for extend write or truncate
+    pub async fn resize(&self, new_size: usize) -> KResult<()> {
+        let mut pages = self.pages.lock().await;
+        let old_size = self.size.load(Ordering::Relaxed);
+
+        if new_size < old_size {
+            let begin = new_size.align_down(PAGE_SIZE) >> PAGE_SIZE_BITS;
+            let end = old_size.align_up(PAGE_SIZE) >> PAGE_SIZE_BITS;
+
+            for page_id in begin..end {
+                pages.remove(&page_id);
+            }
+        } else if new_size > old_size {
+            let begin = old_size.align_down(PAGE_SIZE) >> PAGE_SIZE_BITS;
+            let end = new_size.align_up(PAGE_SIZE) >> PAGE_SIZE_BITS;
+
+            pages.remove(&begin);
+
+            for page_id in begin..end {
+                let mut new_page = CachePage::new_zeroed();
+
+                if page_id != end - 1 {
+                    new_page.set_valid_size(PAGE_SIZE);
+                } else {
+                    new_page.set_valid_size(new_size % PAGE_SIZE);
+                }
+                new_page.set_dirty();
+                pages.insert(page_id, new_page);
+            }
+        }
+
+        self.size.store(new_size, Ordering::Relaxed);
+
+        Ok(())
+    }
+
+    pub async fn get_page(&self, offset: usize) -> KResult<Option<RawPagePtr>> {
+        let offset_aligin = offset.align_down(PAGE_SIZE);
+        let page_id = offset_aligin >> PAGE_SIZE_BITS;
+
+        // println_debug!(
+        //     "pagecache {} {}",
+        //     self.pages.lock().await.len(),
+        //     self.size.load(Ordering::Relaxed)
+        // );
+
+        if offset_aligin > self.size.load(Ordering::Relaxed) {
+            return Ok(None);
+        }
+
+        let mut pages = self.pages.lock().await;
+
+        if let Some(page) = pages.get(&page_id) {
+            Ok(Some(page.0))
+        } else {
+            let mut new_page = CachePage::new();
+            self.backend
+                .upgrade()
+                .unwrap()
+                .read_page(&mut new_page, offset_aligin)?;
+            pages.insert(page_id, new_page);
+            Ok(Some(new_page.0))
+        }
+    }
+}
+
+// with this trait, "page cache" and "block cache" are unified,
+// for fs, offset is file offset (floor algin to PAGE_SIZE)
+// for blkdev, offset is block idx (floor align to PAGE_SIZE / BLK_SIZE)
+// Oh no, this would make uncessary cache
+pub trait PageCacheBackend {
+    fn read_page(&self, page: &mut CachePage, offset: usize) -> KResult<usize>;
+
+    fn write_page(&self, page: &CachePage, offset: usize) -> KResult<usize>;
+}
+
+pub trait PageCacheRawPage: RawPage {
+    fn valid_size(&self) -> &mut usize;
+
+    fn is_dirty(&self) -> bool;
+
+    fn set_dirty(&self);
+
+    fn clear_dirty(&self);
+
+    fn cache_init(&self);
+}
+
+impl Drop for PageCache {
+    fn drop(&mut self) {
+        let _ = self.fsync();
+    }
+}

+ 17 - 5
src/kernel/vfs/inode.rs

@@ -4,6 +4,7 @@ use crate::kernel::constants::{
     EINVAL, EISDIR, ENOTDIR, EPERM, STATX_ATIME, STATX_BLOCKS, STATX_CTIME, STATX_GID, STATX_INO,
     STATX_MODE, STATX_MTIME, STATX_NLINK, STATX_SIZE, STATX_TYPE, STATX_UID, S_IFDIR, S_IFMT,
 };
+use crate::kernel::mem::PageCache;
 use crate::kernel::timer::Instant;
 use crate::{io::Buffer, prelude::*};
 use alloc::sync::{Arc, Weak};
@@ -15,7 +16,6 @@ use core::{
 };
 use eonix_runtime::task::Task;
 use eonix_sync::RwLock;
-use posix_types::namei::RenameFlags;
 use posix_types::stat::StatX;
 
 pub type Ino = u64;
@@ -54,13 +54,13 @@ pub struct InodeData {
 }
 
 impl InodeData {
-    pub const fn new(ino: Ino, vfs: Weak<dyn Vfs>) -> Self {
+    pub fn new(ino: Ino, vfs: Weak<dyn Vfs>) -> Self {
         Self {
             ino,
             vfs,
-            atime: Spin::new(Instant::default()),
-            ctime: Spin::new(Instant::default()),
-            mtime: Spin::new(Instant::default()),
+            atime: Spin::new(Instant::now()),
+            ctime: Spin::new(Instant::now()),
+            mtime: Spin::new(Instant::now()),
             rwsem: RwLock::new(()),
             size: AtomicU64::new(0),
             nlink: AtomicNlink::new(0),
@@ -127,10 +127,18 @@ pub trait Inode: Send + Sync + InodeInner + Any {
         Err(if self.is_dir() { EISDIR } else { EINVAL })
     }
 
+    fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize> {
+        Err(if self.is_dir() { EISDIR } else { EINVAL })
+    }
+
     fn write(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult<usize> {
         Err(if self.is_dir() { EISDIR } else { EINVAL })
     }
 
+    fn write_direct(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult<usize> {
+        Err(if self.is_dir() { EISDIR } else { EINVAL })
+    }
+
     fn devid(&self) -> KResult<DevId> {
         Err(if self.is_dir() { EISDIR } else { EINVAL })
     }
@@ -163,6 +171,10 @@ pub trait Inode: Send + Sync + InodeInner + Any {
         Err(EPERM)
     }
 
+    fn page_cache(&self) -> Option<&PageCache> {
+        None
+    }
+
     fn statx(&self, stat: &mut StatX, mask: u32) -> KResult<()> {
         // Safety: ffi should have checked reference
         let vfs = self.vfs.upgrade().expect("Vfs is dropped");