Просмотр исходного кода

Merge remote-tracking branch 'SMS-Derfflinger/ext4-replace' into task-rework

greatbridf 5 месяцев назад
Родитель
Сommit
c57b71ff95
10 измененных файлов с 634 добавлено и 77 удалено
  1. 10 11
      Cargo.lock
  2. 7 2
      Cargo.toml
  3. 17 1
      src/driver/virtio/virtio_blk.rs
  4. 438 48
      src/fs/ext4.rs
  5. 3 3
      src/fs/fat32.rs
  6. 2 2
      src/fs/tmpfs.rs
  7. 97 0
      src/kernel/block.rs
  8. 1 1
      src/kernel/mem.rs
  9. 58 8
      src/kernel/mem/page_cache.rs
  10. 1 1
      src/kernel/vfs/inode.rs

+ 10 - 11
Cargo.lock

@@ -19,6 +19,15 @@ version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c1c330e503236d0b06386ae6cc42a513ef1ccc23c52b603c1b52f018564faf44"
 
+[[package]]
+name = "another_ext4"
+version = "0.1.0"
+source = "git+https://github.com/SMS-Derfflinger/another_ext4?branch=main#ed6d91718db721eb4a744483c289cc44a6f34bf4"
+dependencies = [
+ "bitflags",
+ "log",
+]
+
 [[package]]
 name = "atomic_unique_refcell"
 version = "0.1.0"
@@ -134,6 +143,7 @@ version = "0.1.0"
 dependencies = [
  "acpi",
  "align_ext",
+ "another_ext4",
  "atomic_unique_refcell",
  "bitflags",
  "buddy_allocator",
@@ -145,7 +155,6 @@ dependencies = [
  "eonix_preempt",
  "eonix_runtime",
  "eonix_sync",
- "ext4_rs",
  "intrusive-collections 0.9.8",
  "intrusive_list",
  "itertools",
@@ -250,16 +259,6 @@ dependencies = [
  "intrusive-collections 0.9.7",
 ]
 
-[[package]]
-name = "ext4_rs"
-version = "1.3.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9a1a97344bde15b0ace15e265dab27228d4bdc37a0bfa8548c5645d7cfa6a144"
-dependencies = [
- "bitflags",
- "log",
-]
-
 [[package]]
 name = "fdt"
 version = "0.1.5"

+ 7 - 2
Cargo.toml

@@ -30,7 +30,7 @@ itertools = { version = "0.13.0", default-features = false }
 acpi = "5.2.0"
 align_ext = "0.1.0"
 xmas-elf = "0.10.0"
-ext4_rs = "1.3.2"
+another_ext4 = { git = "https://github.com/SMS-Derfflinger/another_ext4", branch = "main" }
 stalloc = { version = "0.6.1", default-features = false, features = [
     "allocator-api",
 ] }
@@ -39,7 +39,12 @@ stalloc = { version = "0.6.1", default-features = false, features = [
 virtio-drivers = { version = "0.11.0" }
 
 [target.'cfg(target_arch = "riscv64")'.dependencies]
-unwinding = { version = "0.2.8", default-features = false, features = ["unwinder", "fde-static", "personality", "panic"] }
+unwinding = { version = "0.2.8", default-features = false, features = [
+    "unwinder",
+    "fde-static",
+    "personality",
+    "panic",
+] }
 
 [features]
 default = []

+ 17 - 1
src/driver/virtio/virtio_blk.rs

@@ -84,7 +84,23 @@ where
 
     fn submit(&self, req: BlockDeviceRequest) -> KResult<()> {
         match req {
-            BlockDeviceRequest::Write { .. } => todo!(),
+            BlockDeviceRequest::Write {
+                sector,
+                count,
+                buffer,
+            } => {
+                let mut dev = self.lock();
+                for ((start, len), buffer_page) in
+                    Chunks::new(sector as usize, count as usize, 8).zip(buffer.iter())
+                {
+                    let buffer = unsafe {
+                        // SAFETY: Pages in `req.buffer` are guaranteed to be exclusively owned by us.
+                        &buffer_page.as_memblk().as_bytes()[..len as usize * 512]
+                    };
+
+                    dev.write_blocks(start, buffer).map_err(|_| EIO)?;
+                }
+            }
             BlockDeviceRequest::Read {
                 sector,
                 count,

+ 438 - 48
src/fs/ext4.rs

@@ -1,17 +1,20 @@
 use core::sync::atomic::{AtomicU32, AtomicU64, Ordering};
 
-use crate::kernel::mem::{PageCache, PageCacheBackend};
+use crate::kernel::mem::{CachePage, CachePageStream, PageCache, PageCacheBackend};
 use crate::kernel::task::block_on;
+use crate::kernel::timer::Ticks;
 use crate::kernel::vfs::inode::{AtomicMode, Mode};
 use crate::{
-    io::{Buffer, ByteBuffer},
+    io::{Buffer, ByteBuffer, Stream},
     kernel::{
         block::BlockDevice,
-        constants::EIO,
+        constants::{EEXIST, EINVAL, EIO, ENOSYS},
         timer::Instant,
         vfs::{
-            dentry::Dentry,
-            inode::{define_struct_inode, AtomicNlink, Ino, Inode, InodeData},
+            dentry::{dcache, Dentry},
+            inode::{
+                define_struct_inode, AtomicNlink, Ino, Inode, InodeData, RenameData, WriteOffset,
+            },
             mount::{register_filesystem, Mount, MountCreator},
             vfs::Vfs,
             DevId, FsContext,
@@ -25,9 +28,10 @@ use alloc::{
     collections::btree_map::{BTreeMap, Entry},
     sync::Arc,
 };
+use another_ext4::{
+    Block, BlockDevice as Ext4BlockDeviceTrait, Ext4, FileType, InodeMode, PBlockId,
+};
 use eonix_sync::RwLock;
-use ext4_rs::{BlockDevice as Ext4BlockDeviceTrait, Ext4Error};
-use ext4_rs::{Errno, Ext4};
 
 pub struct Ext4BlockDevice {
     device: Arc<BlockDevice>,
@@ -40,20 +44,25 @@ impl Ext4BlockDevice {
 }
 
 impl Ext4BlockDeviceTrait for Ext4BlockDevice {
-    fn read_offset(&self, offset: usize) -> Vec<u8> {
-        let mut buffer = vec![0u8; 4096];
+    fn read_block(&self, block_id: PBlockId) -> Block {
+        let mut buffer = [0u8; 4096];
         let mut byte_buffer = ByteBuffer::new(buffer.as_mut_slice());
 
         let _ = self
             .device
-            .read_some(offset, &mut byte_buffer)
+            .read_some((block_id as usize) * 4096, &mut byte_buffer)
             .expect("Failed to read from block device");
 
-        buffer
+        Block {
+            id: block_id,
+            data: buffer,
+        }
     }
 
-    fn write_offset(&self, _offset: usize, _data: &[u8]) {
-        todo!()
+    fn write_block(&self, block: &another_ext4::Block) {
+        let _ = self
+            .device
+            .write_some((block.id as usize) * 4096, &block.data);
     }
 }
 
@@ -74,7 +83,7 @@ impl Vfs for Ext4Fs {
     }
 
     fn is_read_only(&self) -> bool {
-        true
+        false
     }
 }
 
@@ -83,6 +92,43 @@ impl Ext4Fs {
         icache.get(&ino).cloned().map(Ext4Inode::into_inner)
     }
 
+    fn modify_inode_stat(&self, ino: u32, size: Option<u64>, mtime: u32) {
+        let _ = self
+            .inner
+            .setattr(ino, None, None, None, size, None, Some(mtime), None, None);
+    }
+
+    fn create_inode_stat(&self, parent: u32, child: u32, mtime: u32) {
+        let _ = self.inner.setattr(
+            parent,
+            None,
+            None,
+            None,
+            None,
+            None,
+            Some(mtime),
+            None,
+            None,
+        );
+        let _ = self
+            .inner
+            .setattr(child, None, None, None, None, None, Some(mtime), None, None);
+    }
+
+    fn chmod_stat(&self, ino: u32, new_mode: u16, ctime: u32) {
+        let _ = self.inner.setattr(
+            ino,
+            Some(InodeMode::from_bits_retain(new_mode.try_into().unwrap())),
+            None,
+            None,
+            None,
+            None,
+            None,
+            Some(ctime),
+            None,
+        );
+    }
+
     fn get_or_insert(
         &self,
         icache: &mut BTreeMap<Ino, Ext4Inode>,
@@ -92,7 +138,7 @@ impl Ext4Fs {
             Entry::Occupied(occupied) => occupied.get().clone().into_inner(),
             Entry::Vacant(vacant) => match idata.mode.load().format() {
                 Mode::REG => vacant
-                    .insert(Ext4Inode::File(FileInode::new(idata)))
+                    .insert(Ext4Inode::File(FileInode::with_idata(idata)))
                     .clone()
                     .into_inner(),
                 Mode::DIR => vacant
@@ -102,7 +148,7 @@ impl Ext4Fs {
                 mode => {
                     println_warn!("ext4: Unsupported inode type: {:#o}", mode.format_bits());
                     vacant
-                        .insert(Ext4Inode::File(FileInode::new(idata)))
+                        .insert(Ext4Inode::File(FileInode::with_idata(idata)))
                         .clone()
                         .into_inner()
                 }
@@ -114,7 +160,7 @@ impl Ext4Fs {
 impl Ext4Fs {
     pub fn create(device: Arc<BlockDevice>) -> KResult<(Arc<Self>, Arc<dyn Inode>)> {
         let ext4_device = Ext4BlockDevice::new(device.clone());
-        let ext4 = Ext4::open(Arc::new(ext4_device));
+        let ext4 = Ext4::load(Arc::new(ext4_device)).unwrap();
 
         let ext4fs = Arc::new(Self {
             inner: ext4,
@@ -124,28 +170,28 @@ impl Ext4Fs {
 
         let root_inode = {
             let mut icache = block_on(ext4fs.icache.write());
-            let root_inode = ext4fs.inner.get_inode_ref(2);
+            let root_inode = ext4fs.inner.read_root_inode();
 
             ext4fs.get_or_insert(
                 &mut icache,
                 InodeData {
-                    ino: root_inode.inode_num as Ino,
+                    ino: root_inode.id as Ino,
                     size: AtomicU64::new(root_inode.inode.size()),
-                    nlink: AtomicNlink::new(root_inode.inode.links_count() as _),
+                    nlink: AtomicNlink::new(root_inode.inode.link_count() as _),
                     uid: AtomicU32::new(root_inode.inode.uid() as _),
                     gid: AtomicU32::new(root_inode.inode.gid() as _),
-                    mode: AtomicMode::new(root_inode.inode.mode() as _),
+                    mode: AtomicMode::new(root_inode.inode.mode().bits() as _),
                     atime: Spin::new(Instant::new(
                         root_inode.inode.atime() as _,
-                        root_inode.inode.i_atime_extra() as _,
+                        root_inode.inode.atime_extra() as _,
                     )),
                     ctime: Spin::new(Instant::new(
                         root_inode.inode.ctime() as _,
-                        root_inode.inode.i_ctime_extra() as _,
+                        root_inode.inode.ctime_extra() as _,
                     )),
                     mtime: Spin::new(Instant::new(
                         root_inode.inode.mtime() as _,
-                        root_inode.inode.i_mtime_extra() as _,
+                        root_inode.inode.mtime_extra() as _,
                     )),
                     rwsem: RwLock::new(()),
                     vfs: Arc::downgrade(&ext4fs) as _,
@@ -174,6 +220,7 @@ impl Ext4Inode {
 
 define_struct_inode! {
     struct FileInode {
+        last_sync: AtomicU64,
         page_cache: PageCache,
     }
 }
@@ -183,23 +230,49 @@ define_struct_inode! {
 }
 
 impl FileInode {
-    fn new(idata: InodeData) -> Arc<Self> {
+    fn with_idata(idata: InodeData) -> Arc<Self> {
         let inode = Arc::new_cyclic(|weak_self: &Weak<FileInode>| Self {
             idata,
+            last_sync: AtomicU64::new(0),
             page_cache: PageCache::new(weak_self.clone()),
         });
 
         inode
     }
+
+    pub fn new(ino: Ino, vfs: Weak<dyn Vfs>, mode: Mode) -> Arc<Self> {
+        Arc::new_cyclic(|weak_self: &Weak<FileInode>| Self {
+            idata: {
+                let inode_data = InodeData::new(ino, vfs);
+                inode_data.mode.store(Mode::REG.perm(mode.bits()));
+                inode_data.nlink.store(1, Ordering::Relaxed);
+                inode_data
+            },
+            last_sync: AtomicU64::new(0),
+            page_cache: PageCache::new(weak_self.clone()),
+        })
+    }
+
+    fn sync_if_needed(&self) {
+        let now = Ticks::now().in_secs();
+        let last = self.last_sync.load(Ordering::Relaxed);
+
+        // TODO: this is a temporary implement,
+        // consider change this with some update strategy such as LRU future
+        if now - last > 10 {
+            self.last_sync.store(now, Ordering::Relaxed);
+            let _ = block_on(self.page_cache.fsync());
+        }
+    }
 }
 
 impl PageCacheBackend for FileInode {
-    fn read_page(&self, page: &mut crate::kernel::mem::CachePage, offset: usize) -> KResult<usize> {
+    fn read_page(&self, page: &mut CachePage, offset: usize) -> KResult<usize> {
         self.read_direct(page, offset)
     }
 
-    fn write_page(&self, _page: &crate::kernel::mem::CachePage, _offset: usize) -> KResult<usize> {
-        todo!()
+    fn write_page(&self, page: &mut CachePageStream, offset: usize) -> KResult<usize> {
+        self.write_direct(page, offset)
     }
 
     fn size(&self) -> usize {
@@ -221,13 +294,135 @@ impl Inode for FileInode {
         let ext4fs = vfs.as_any().downcast_ref::<Ext4Fs>().unwrap();
 
         let mut temp_buf = vec![0u8; buffer.total()];
-        match ext4fs.inner.read_at(self.ino as u32, offset, &mut temp_buf) {
+        match ext4fs.inner.read(self.ino as u32, offset, &mut temp_buf) {
             Ok(bytes_read) => {
                 let _ = buffer.fill(&temp_buf[..bytes_read])?;
                 Ok(buffer.wrote())
             }
-            Err(e) => Err(e.error() as u32),
+            Err(e) => Err(e.code() as u32),
+        }
+    }
+
+    fn write(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult<usize> {
+        let _lock = block_on(self.rwsem.write());
+
+        let mut store_new_end = None;
+        let offset = match offset {
+            WriteOffset::Position(offset) => offset,
+            // TODO: here need to add some operate
+            WriteOffset::End(end) => {
+                store_new_end = Some(end);
+                self.size.load(Ordering::Relaxed) as usize
+            }
+        };
+
+        let total_written = block_on(self.page_cache.write(stream, offset))?;
+        let cursor_end = offset + total_written;
+        if let Some(store_end) = store_new_end {
+            *store_end = cursor_end;
+        }
+
+        let mtime = Instant::now();
+        *self.mtime.lock() = mtime;
+        self.size.store(cursor_end as u64, Ordering::Relaxed);
+
+        self.sync_if_needed();
+
+        Ok(total_written)
+    }
+
+    fn write_direct(&self, stream: &mut dyn Stream, offset: usize) -> KResult<usize> {
+        //let _lock = Task::block_on(self.rwsem.write());
+
+        let vfs = self.vfs.upgrade().ok_or(EIO)?;
+        let ext4fs = vfs.as_any().downcast_ref::<Ext4Fs>().unwrap();
+
+        let mut temp_buf = vec![0u8; 4096];
+        let mut total_written = 0;
+
+        while let Some(data) = stream.poll_data(&mut temp_buf)? {
+            let written = ext4fs
+                .inner
+                .write(self.ino as u32, offset + total_written, data)
+                .unwrap();
+            total_written += written;
+            if written < data.len() {
+                break;
+            }
         }
+
+        ext4fs.modify_inode_stat(
+            self.ino as u32,
+            Some(self.size() as u64),
+            self.mtime.lock().since_epoch().as_secs() as u32,
+        );
+
+        Ok(total_written)
+    }
+
+    fn chmod(&self, mode: Mode) -> KResult<()> {
+        let _lock = block_on(self.rwsem.write());
+
+        let vfs = self.vfs.upgrade().ok_or(EIO)?;
+        let ext4fs = vfs.as_any().downcast_ref::<Ext4Fs>().unwrap();
+        let old_mode = self.mode.load();
+        let new_mode = old_mode.perm(mode.bits());
+
+        let now = Instant::now();
+        ext4fs.chmod_stat(
+            self.ino as u32,
+            new_mode.bits() as u16,
+            now.since_epoch().as_secs() as u32,
+        );
+
+        // SAFETY: `rwsem` has done the synchronization
+        self.mode.store(new_mode);
+        *self.ctime.lock() = now;
+
+        Ok(())
+    }
+
+    // TODO
+    fn truncate(&self, _length: usize) -> KResult<()> {
+        Ok(())
+    }
+}
+
+impl DirInode {
+    fn new(ino: Ino, vfs: Weak<dyn Vfs>, mode: Mode) -> Arc<Self> {
+        Arc::new_cyclic(|_| DirInode {
+            idata: {
+                let inode_data = InodeData::new(ino, vfs);
+                inode_data.mode.store(Mode::DIR.perm(mode.bits()));
+                inode_data.nlink.store(2, Ordering::Relaxed);
+                inode_data.size.store(4096, Ordering::Relaxed);
+                inode_data
+            },
+        })
+    }
+
+    fn update_time(&self, time: Instant) {
+        *self.ctime.lock() = time;
+        *self.mtime.lock() = time;
+    }
+
+    fn update_child_time(&self, child: &dyn Inode, time: Instant) {
+        self.update_time(time);
+        *child.ctime.lock() = time;
+        *child.mtime.lock() = time;
+    }
+
+    fn link_file(&self) {
+        self.size.fetch_add(1, Ordering::Relaxed);
+    }
+
+    fn link_dir(&self) {
+        self.nlink.fetch_add(1, Ordering::Relaxed);
+        self.size.fetch_add(1, Ordering::Relaxed);
+    }
+
+    fn unlink_dir(&self) {
+        self.nlink.fetch_sub(1, Ordering::Relaxed);
     }
 }
 
@@ -238,13 +433,14 @@ impl Inode for DirInode {
 
         let name = dentry.get_name();
         let name = String::from_utf8_lossy(&name);
-        let lookup_result = ext4fs.inner.fuse_lookup(self.ino, &name);
+        let lookup_result = ext4fs.inner.lookup(self.ino as u32, &name);
 
-        const EXT4_ERROR_ENOENT: Ext4Error = Ext4Error::new(Errno::ENOENT);
+        // TODO: wtf
+        //const EXT4_ERROR_ENOENT: Ext4Error_ = Ext4Error_::new(ErrCode::ENOENT);
         let attr = match lookup_result {
-            Ok(attr) => attr,
-            Err(EXT4_ERROR_ENOENT) => return Ok(None),
-            Err(error) => return Err(error.error() as u32),
+            Ok(inode_id) => ext4fs.inner.getattr(inode_id).unwrap(),
+            //Err(EXT4_ERROR_ENOENT) => return Ok(None),
+            Err(error) => return Err(error.code() as u32),
         };
 
         // Fast path: if the inode is already in the cache, return it.
@@ -252,9 +448,19 @@ impl Inode for DirInode {
             return Ok(Some(inode));
         }
 
-        let extra_perm = attr.perm.bits() as u32 & 0o7000;
-        let perm = attr.perm.bits() as u32 & 0o0700;
-        let real_perm = extra_perm | perm | perm >> 3 | perm >> 6;
+        let file_type_bits = match attr.ftype {
+            FileType::RegularFile => InodeMode::FILE.bits(),
+            FileType::Directory => InodeMode::DIRECTORY.bits(),
+            FileType::CharacterDev => InodeMode::CHARDEV.bits(),
+            FileType::BlockDev => InodeMode::BLOCKDEV.bits(),
+            FileType::Fifo => InodeMode::FIFO.bits(),
+            FileType::Socket => InodeMode::SOCKET.bits(),
+            FileType::SymLink => InodeMode::SOFTLINK.bits(),
+            FileType::Unknown => 0,
+        };
+
+        let perm_bits = attr.perm.bits() & InodeMode::PERM_MASK.bits();
+        let mode = file_type_bits | perm_bits;
 
         // Create a new inode based on the attributes.
         let mut icache = block_on(ext4fs.icache.write());
@@ -263,10 +469,10 @@ impl Inode for DirInode {
             InodeData {
                 ino: attr.ino as Ino,
                 size: AtomicU64::new(attr.size),
-                nlink: AtomicNlink::new(attr.nlink as _),
+                nlink: AtomicNlink::new(attr.links as _),
                 uid: AtomicU32::new(attr.uid),
                 gid: AtomicU32::new(attr.gid),
-                mode: AtomicMode::new(attr.kind.bits() as u32 | real_perm),
+                mode: AtomicMode::new(mode as _),
                 atime: Spin::new(Instant::new(attr.atime as _, 0)),
                 ctime: Spin::new(Instant::new(attr.ctime as _, 0)),
                 mtime: Spin::new(Instant::new(attr.mtime as _, 0)),
@@ -288,22 +494,206 @@ impl Inode for DirInode {
 
         let entries = ext4fs
             .inner
-            .fuse_readdir(self.ino as u64, 0, offset as i64)
-            .map_err(|err| err.error() as u32)?;
-        let mut current_offset = 0;
+            .listdir(self.ino as u32)
+            .map_err(|err| err.code() as u32)?;
 
-        for entry in entries {
-            let name_len = entry.name_len as usize;
-            let name = &entry.name[..name_len];
+        let entries_to_process = if offset < entries.len() {
+            &entries[offset..]
+        } else {
+            &entries[0..0]
+        };
+        let mut current_offset = 0;
+        for entry in entries_to_process {
+            let name_string = entry.name();
+            let name = name_string.as_bytes();
+            let inode = entry.inode() as Ino;
 
-            if callback(name, entry.inode as Ino)?.is_break() {
+            if callback(name, inode)?.is_break() {
                 break;
             }
-
             current_offset += 1;
         }
         Ok(current_offset)
     }
+
+    fn creat(&self, at: &Arc<Dentry>, mode: Mode) -> KResult<()> {
+        let _lock = block_on(self.rwsem.write());
+
+        let vfs = self.vfs.upgrade().ok_or(EIO)?;
+        let ext4fs = vfs.as_any().downcast_ref::<Ext4Fs>().unwrap();
+
+        let name = at.get_name();
+        let name = String::from_utf8_lossy(&name);
+
+        let new_ino = ext4fs
+            .inner
+            .create(
+                self.ino as u32,
+                &name,
+                InodeMode::from_bits_retain(Mode::REG.perm(mode.bits()).bits() as u16),
+            )
+            .unwrap();
+
+        let file = FileInode::new(new_ino as u64, self.vfs.clone(), mode);
+        let now = Instant::now();
+        self.update_child_time(file.as_ref(), now);
+        self.link_file();
+
+        ext4fs.create_inode_stat(self.ino as u32, new_ino, now.since_epoch().as_secs() as u32);
+
+        at.save_reg(file)
+    }
+
+    fn mkdir(&self, at: &Dentry, mode: Mode) -> KResult<()> {
+        let _lock = block_on(self.rwsem.write());
+
+        let vfs = self.vfs.upgrade().ok_or(EIO)?;
+        let ext4fs = vfs.as_any().downcast_ref::<Ext4Fs>().unwrap();
+
+        let name = at.get_name();
+        let name = String::from_utf8_lossy(&name);
+
+        let new_ino = ext4fs
+            .inner
+            .mkdir(
+                self.ino as u32,
+                &name,
+                InodeMode::from(Mode::DIR.perm(mode.bits())),
+            )
+            .unwrap();
+
+        let new_dir = DirInode::new(new_ino as u64, self.vfs.clone(), mode);
+        let now = Instant::now();
+        self.update_child_time(new_dir.as_ref(), now);
+        self.link_dir();
+
+        ext4fs.create_inode_stat(self.ino as u32, new_ino, now.since_epoch().as_secs() as u32);
+
+        at.save_dir(new_dir)
+    }
+
+    fn unlink(&self, at: &Arc<Dentry>) -> KResult<()> {
+        let _dir_lock = block_on(self.rwsem.write());
+
+        let vfs = self.vfs.upgrade().ok_or(EIO)?;
+        let ext4fs = vfs.as_any().downcast_ref::<Ext4Fs>().unwrap();
+
+        let file = at.get_inode()?;
+
+        let name = at.get_name();
+        let name = String::from_utf8_lossy(&name);
+        let _file_lock = block_on(file.rwsem.write());
+
+        if file.is_dir() {
+            let _ = ext4fs.inner.rmdir(self.ino as u32, &name);
+            self.unlink_dir();
+        } else {
+            let _ = ext4fs.inner.unlink(self.ino as u32, &name);
+        }
+        let now = Instant::now();
+        self.update_time(now);
+        ext4fs.modify_inode_stat(self.ino as u32, None, now.since_epoch().as_secs() as u32);
+
+        dcache::d_remove(at);
+
+        Ok(())
+    }
+
+    fn chmod(&self, mode: Mode) -> KResult<()> {
+        let _lock = block_on(self.rwsem.write());
+
+        let vfs = self.vfs.upgrade().ok_or(EIO)?;
+        let ext4fs = vfs.as_any().downcast_ref::<Ext4Fs>().unwrap();
+        let old_mode = self.mode.load();
+        let new_mode = old_mode.perm(mode.bits());
+
+        let now = Instant::now();
+        ext4fs.chmod_stat(
+            self.ino as u32,
+            new_mode.non_format_bits() as _,
+            now.since_epoch().as_secs() as u32,
+        );
+
+        // SAFETY: `rwsem` has done the synchronization
+        self.mode.store(new_mode);
+        *self.ctime.lock() = now;
+
+        Ok(())
+    }
+
+    fn rename(&self, rename_data: RenameData) -> KResult<()> {
+        let RenameData {
+            old_dentry,
+            new_dentry,
+            new_parent,
+            is_exchange,
+            no_replace,
+            ..
+        } = rename_data;
+
+        if is_exchange {
+            println_warn!("Ext4Fs does not support exchange rename for now");
+            return Err(ENOSYS);
+        }
+
+        // TODO: may need another lock
+        let _lock = block_on(self.rwsem.write());
+        let vfs = self.vfs.upgrade().ok_or(EIO)?;
+        let ext4fs = vfs.as_any().downcast_ref::<Ext4Fs>().unwrap();
+
+        let old_file = old_dentry.get_inode()?;
+        let new_file = new_dentry.get_inode();
+        if no_replace && new_file.is_ok() {
+            return Err(EEXIST);
+        }
+
+        let name = old_dentry.name();
+        let name = core::str::from_utf8(&*name).map_err(|_| EINVAL)?;
+        let new_name = new_dentry.name();
+        let new_name = core::str::from_utf8(&*new_name).map_err(|_| EINVAL)?;
+
+        ext4fs
+            .inner
+            .rename(self.ino as u32, name, new_parent.ino as u32, new_name)
+            .map_err(|err| err.code() as u32)?;
+
+        // TODO: may need more operations
+        let now = Instant::now();
+        *old_file.ctime.lock() = now;
+        *self.mtime.lock() = now;
+
+        let same_parent = Arc::as_ptr(&new_parent) == &raw const *self;
+        if !same_parent {
+            *new_parent.mtime.lock() = now;
+            if old_file.is_dir() {
+                self.nlink.fetch_sub(1, Ordering::Relaxed);
+                new_parent.nlink.fetch_add(1, Ordering::Relaxed);
+            }
+        }
+
+        if let Ok(replaced_file) = new_dentry.get_inode() {
+            if !no_replace {
+                *replaced_file.ctime.lock() = now;
+                replaced_file.nlink.fetch_sub(1, Ordering::Relaxed);
+            }
+        }
+
+        block_on(dcache::d_exchange(old_dentry, new_dentry));
+
+        Ok(())
+    }
+}
+
+impl From<Mode> for InodeMode {
+    fn from(value: Mode) -> Self {
+        InodeMode::from_bits_retain(value.bits() as u16)
+    }
+}
+
+impl From<InodeMode> for Mode {
+    fn from(value: InodeMode) -> Self {
+        Mode::new(value.bits() as u32)
+    }
 }
 
 struct Ext4MountCreator;

+ 3 - 3
src/fs/fat32.rs

@@ -3,7 +3,7 @@ mod file;
 
 use crate::io::Stream;
 use crate::kernel::constants::EIO;
-use crate::kernel::mem::AsMemoryBlock;
+use crate::kernel::mem::{AsMemoryBlock, CachePageStream};
 use crate::kernel::task::block_on;
 use crate::kernel::vfs::inode::{Mode, WriteOffset};
 use crate::{
@@ -311,7 +311,7 @@ impl Inode for FileInode {
         todo!()
     }
 
-    fn write_direct(&self, _stream: &mut dyn Stream, _offset: WriteOffset) -> KResult<usize> {
+    fn write_direct(&self, _stream: &mut dyn Stream, _offset: usize) -> KResult<usize> {
         todo!()
     }
 }
@@ -321,7 +321,7 @@ impl PageCacheBackend for FileInode {
         self.read_direct(page, offset)
     }
 
-    fn write_page(&self, _page: &CachePage, _offset: usize) -> KResult<usize> {
+    fn write_page(&self, _page: &mut CachePageStream, _offset: usize) -> KResult<usize> {
         todo!()
     }
 

+ 2 - 2
src/fs/tmpfs.rs

@@ -1,6 +1,6 @@
 use crate::io::Stream;
 use crate::kernel::constants::{EEXIST, EINVAL, EIO, EISDIR, ENOENT, ENOSYS, ENOTDIR};
-use crate::kernel::mem::{CachePage, PageCache, PageCacheBackend};
+use crate::kernel::mem::{CachePage, CachePageStream, PageCache, PageCacheBackend};
 use crate::kernel::task::block_on;
 use crate::kernel::timer::Instant;
 use crate::kernel::vfs::inode::RenameData;
@@ -485,7 +485,7 @@ impl PageCacheBackend for FileInode {
         Ok(PAGE_SIZE)
     }
 
-    fn write_page(&self, _page: &CachePage, _offset: usize) -> KResult<usize> {
+    fn write_page(&self, _page: &mut CachePageStream, _offset: usize) -> KResult<usize> {
         Ok(PAGE_SIZE)
     }
 

+ 97 - 0
src/kernel/block.rs

@@ -270,6 +270,103 @@ impl BlockDevice {
             Ok(FillResult::Partial(nfilled))
         }
     }
+
+    /// Write some data to the block device, may involve some copy and fragmentation
+    ///
+    /// # Arguments
+    /// `offset` - offset in bytes
+    /// `data` - data to write
+    ///
+    pub fn write_some(&self, offset: usize, data: &[u8]) -> KResult<usize> {
+        let mut sector_start = offset as u64 / 512;
+        let mut first_sector_offset = offset as u64 % 512;
+        let mut remaining_data = data;
+        let mut nwritten = 0;
+
+        while !remaining_data.is_empty() {
+            let pages: &[Page];
+            let page: Option<Page>;
+            let page_vec: Option<Vec<Page>>;
+
+            // Calculate sectors needed for this write
+            let write_end = first_sector_offset + remaining_data.len() as u64;
+            let sector_count = ((write_end + 511) / 512).min(self.queue().max_request_pages());
+
+            match sector_count {
+                count if count <= 8 => {
+                    let _page = Page::alloc();
+                    page = Some(_page);
+                    pages = core::slice::from_ref(page.as_ref().unwrap());
+                }
+                count if count <= 16 => {
+                    let _pages = Page::alloc_order(1);
+                    page = Some(_pages);
+                    pages = core::slice::from_ref(page.as_ref().unwrap());
+                }
+                count => {
+                    let npages = (count + 15) / 16;
+                    let mut _page_vec = Vec::with_capacity(npages as usize);
+                    for _ in 0..npages {
+                        _page_vec.push(Page::alloc_order(1));
+                    }
+                    page_vec = Some(_page_vec);
+                    pages = page_vec.as_ref().unwrap().as_slice();
+                }
+            }
+
+            if first_sector_offset != 0 || remaining_data.len() < (sector_count * 512) as usize {
+                let read_req = BlockDeviceRequest::Read {
+                    sector: sector_start,
+                    count: sector_count,
+                    buffer: pages,
+                };
+                self.commit_request(read_req)?;
+            }
+
+            let mut data_offset = 0;
+            let mut page_offset = first_sector_offset as usize;
+
+            for page in pages.iter() {
+                // SAFETY: We own the page and can modify it
+                let page_data = unsafe {
+                    let memblk = page.as_memblk();
+                    core::slice::from_raw_parts_mut(memblk.addr().get() as *mut u8, memblk.len())
+                };
+
+                let copy_len =
+                    (remaining_data.len() - data_offset).min(page_data.len() - page_offset);
+
+                if copy_len == 0 {
+                    break;
+                }
+
+                page_data[page_offset..page_offset + copy_len]
+                    .copy_from_slice(&remaining_data[data_offset..data_offset + copy_len]);
+
+                data_offset += copy_len;
+                page_offset = 0; // Only first page has offset
+
+                if data_offset >= remaining_data.len() {
+                    break;
+                }
+            }
+
+            let write_req = BlockDeviceRequest::Write {
+                sector: sector_start,
+                count: sector_count,
+                buffer: pages,
+            };
+            self.commit_request(write_req)?;
+
+            let bytes_written = data_offset;
+            nwritten += bytes_written;
+            remaining_data = &remaining_data[bytes_written..];
+            sector_start += sector_count;
+            first_sector_offset = 0;
+        }
+
+        Ok(nwritten)
+    }
 }
 
 pub enum BlockDeviceRequest<'lt> {

+ 1 - 1
src/kernel/mem.rs

@@ -12,5 +12,5 @@ pub use access::{AsMemoryBlock, MemoryBlock, PhysAccess};
 pub(self) use mm_area::MMArea;
 pub use mm_list::{handle_kernel_page_fault, FileMapping, MMList, Mapping, Permission};
 pub use page_alloc::{GlobalPageAlloc, RawPage};
-pub use page_cache::{CachePage, PageCache, PageCacheBackend};
+pub use page_cache::{CachePage, CachePageStream, PageCache, PageCacheBackend};
 pub use paging::{Page, PageBuffer};

+ 58 - 8
src/kernel/mem/page_cache.rs

@@ -127,27 +127,32 @@ impl PageCache {
 
     pub async fn read(&self, buffer: &mut dyn Buffer, mut offset: usize) -> KResult<usize> {
         let mut pages = self.pages.lock().await;
+        let size = self.backend.upgrade().unwrap().size();
 
         loop {
+            if offset >= size {
+                break;
+            }
             let page_id = offset >> PAGE_SIZE_BITS;
             let page = pages.get(&page_id);
 
             match page {
                 Some(page) => {
                     let inner_offset = offset % PAGE_SIZE;
+                    let available_in_file = size.saturating_sub(offset);
 
                     // TODO: still cause unnecessary IO if valid_size < PAGESIZE
                     //       and fill result is Done
-                    if page.valid_size() == 0
-                        || buffer
-                            .fill(&page.valid_data()[inner_offset..])?
-                            .should_stop()
+                    let page_data = &page.valid_data()[inner_offset..];
+                    let read_size = page_data.len().min(available_in_file);
+
+                    if read_size == 0
+                        || buffer.fill(&page_data[..read_size])?.should_stop()
                         || buffer.available() == 0
                     {
                         break;
                     }
-
-                    offset += PAGE_SIZE - inner_offset;
+                    offset += read_size;
                 }
                 None => {
                     let mut new_page = CachePage::new();
@@ -219,7 +224,7 @@ impl PageCache {
                 self.backend
                     .upgrade()
                     .unwrap()
-                    .write_page(page, page_id << PAGE_SIZE_BITS)?;
+                    .write_page(&mut CachePageStream::new(*page), page_id << PAGE_SIZE_BITS)?;
                 page.clear_dirty();
             }
         }
@@ -295,6 +300,51 @@ impl PageCache {
     }
 }
 
+pub struct CachePageStream {
+    page: CachePage,
+    cur: usize,
+}
+
+impl CachePageStream {
+    pub fn new(page: CachePage) -> Self {
+        Self { page, cur: 0 }
+    }
+
+    pub fn remaining(&self) -> usize {
+        self.page.valid_size().saturating_sub(self.cur)
+    }
+
+    pub fn is_drained(&self) -> bool {
+        self.cur >= self.page.valid_size()
+    }
+}
+
+impl Stream for CachePageStream {
+    fn poll_data<'a>(&mut self, buf: &'a mut [u8]) -> KResult<Option<&'a mut [u8]>> {
+        if self.cur >= self.page.valid_size() {
+            return Ok(None);
+        }
+
+        let page_data = &self.page.all()[self.cur..self.page.valid_size()];
+        let to_read = buf.len().min(page_data.len());
+
+        buf[..to_read].copy_from_slice(&page_data[..to_read]);
+        self.cur += to_read;
+
+        Ok(Some(&mut buf[..to_read]))
+    }
+
+    fn ignore(&mut self, len: usize) -> KResult<Option<usize>> {
+        if self.cur >= self.page.valid_size() {
+            return Ok(None);
+        }
+
+        let to_ignore = len.min(self.page.valid_size() - self.cur);
+        self.cur += to_ignore;
+        Ok(Some(to_ignore))
+    }
+}
+
 // with this trait, "page cache" and "block cache" are unified,
 // for fs, offset is file offset (floor algin to PAGE_SIZE)
 // for blkdev, offset is block idx (floor align to PAGE_SIZE / BLK_SIZE)
@@ -302,7 +352,7 @@ impl PageCache {
 pub trait PageCacheBackend {
     fn read_page(&self, page: &mut CachePage, offset: usize) -> KResult<usize>;
 
-    fn write_page(&self, page: &CachePage, offset: usize) -> KResult<usize>;
+    fn write_page(&self, page: &mut CachePageStream, offset: usize) -> KResult<usize>;
 
     fn size(&self) -> usize;
 }

+ 1 - 1
src/kernel/vfs/inode.rs

@@ -141,7 +141,7 @@ pub trait Inode: Send + Sync + InodeInner + Any {
         Err(if self.is_dir() { EISDIR } else { EINVAL })
     }
 
-    fn write_direct(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult<usize> {
+    fn write_direct(&self, stream: &mut dyn Stream, offset: usize) -> KResult<usize> {
         Err(if self.is_dir() { EISDIR } else { EINVAL })
     }