Эх сурвалжийг харах

Merge branch 'master' into ext4-replace

Heinz 6 сар өмнө
parent
commit
082c5c52b4
87 өөрчлөгдсөн 4904 нэмэгдсэн , 539 устгасан
  1. 66 0
      .github/workflows/test-build.yaml
  2. 11 0
      Cargo.lock
  3. 11 7
      Cargo.toml
  4. 44 6
      Makefile.src
  5. 12 6
      configure
  6. 4 0
      crates/eonix_hal/Cargo.toml
  7. 15 0
      crates/eonix_hal/build.rs
  8. 293 0
      crates/eonix_hal/src/arch/loongarch64/bootstrap.rs
  9. 110 0
      crates/eonix_hal/src/arch/loongarch64/context.rs
  10. 72 0
      crates/eonix_hal/src/arch/loongarch64/cpu.rs
  11. 32 0
      crates/eonix_hal/src/arch/loongarch64/fdt.rs
  12. 58 0
      crates/eonix_hal/src/arch/loongarch64/fence.rs
  13. 141 0
      crates/eonix_hal/src/arch/loongarch64/fpu.rs
  14. 96 0
      crates/eonix_hal/src/arch/loongarch64/link.x
  15. 23 0
      crates/eonix_hal/src/arch/loongarch64/memory.x
  16. 316 0
      crates/eonix_hal/src/arch/loongarch64/mm.rs
  17. 8 0
      crates/eonix_hal/src/arch/loongarch64/mod.rs
  18. 348 0
      crates/eonix_hal/src/arch/loongarch64/trap/mod.rs
  19. 297 0
      crates/eonix_hal/src/arch/loongarch64/trap/trap_context.rs
  20. 3 0
      crates/eonix_hal/src/arch/mod.rs
  21. 7 2
      crates/eonix_hal/src/arch/riscv64/bootstrap.rs
  22. 3 1
      crates/eonix_hal/src/arch/riscv64/cpu.rs
  23. 1 1
      crates/eonix_hal/src/lib.rs
  24. 1 1
      crates/eonix_hal/src/link.x.in
  25. 1 1
      crates/eonix_mm/src/page_table/pte.rs
  26. 23 0
      crates/eonix_percpu/eonix_percpu_macros/src/lib.rs
  27. 51 0
      crates/eonix_percpu/eonix_percpu_macros/src/loongarch64.rs
  28. 6 0
      crates/eonix_percpu/src/lib.rs
  29. 1 0
      crates/posix_types/src/lib.rs
  30. 5 0
      crates/posix_types/src/poll.rs
  31. 8 0
      crates/posix_types/src/stat.rs
  32. 3 0
      crates/posix_types/src/syscall_no.rs
  33. 319 0
      crates/posix_types/src/syscall_no/loongarch64.rs
  34. 3 3
      crates/posix_types/src/syscall_no/riscv64.rs
  35. 1 1
      crates/slab_allocator/src/slab_cache.rs
  36. 7 1
      script/build-img.sh
  37. 13 0
      script/test.sh
  38. 1 1
      src/driver.rs
  39. 13 1
      src/driver/ahci/mod.rs
  40. 10 6
      src/driver/e1000e.rs
  41. 17 1
      src/driver/serial.rs
  42. 16 8
      src/driver/serial/io.rs
  43. 10 150
      src/driver/virtio.rs
  44. 154 0
      src/driver/virtio/loongarch64.rs
  45. 96 0
      src/driver/virtio/riscv64.rs
  46. 70 4
      src/driver/virtio/virtio_blk.rs
  47. 41 5
      src/fs/ext4.rs
  48. 66 6
      src/fs/fat32.rs
  49. 1 0
      src/fs/mod.rs
  50. 146 0
      src/fs/shm.rs
  51. 54 46
      src/fs/tmpfs.rs
  52. 1 0
      src/io.rs
  53. 2 0
      src/kernel/constants.rs
  54. 2 0
      src/kernel/mem.rs
  55. 1 25
      src/kernel/mem/access.rs
  56. 7 8
      src/kernel/mem/allocator.rs
  57. 81 31
      src/kernel/mem/mm_area.rs
  58. 56 14
      src/kernel/mem/mm_list.rs
  59. 14 3
      src/kernel/mem/mm_list/mapping.rs
  60. 13 3
      src/kernel/mem/mm_list/page_fault.rs
  61. 54 9
      src/kernel/mem/page_alloc/raw_page.rs
  62. 324 0
      src/kernel/mem/page_cache.rs
  63. 2 2
      src/kernel/pcie.rs
  64. 111 45
      src/kernel/pcie/device.rs
  65. 5 3
      src/kernel/pcie/driver.rs
  66. 321 19
      src/kernel/pcie/header.rs
  67. 59 7
      src/kernel/pcie/init.rs
  68. 12 0
      src/kernel/syscall.rs
  69. 75 6
      src/kernel/syscall/file_rw.rs
  70. 141 46
      src/kernel/syscall/mm.rs
  71. 154 8
      src/kernel/syscall/procops.rs
  72. 42 12
      src/kernel/syscall/sysinfo.rs
  73. 1 1
      src/kernel/task.rs
  74. 4 1
      src/kernel/task/loader/elf.rs
  75. 4 0
      src/kernel/task/process.rs
  76. 10 3
      src/kernel/task/signal.rs
  77. 18 3
      src/kernel/task/signal/signal_action.rs
  78. 3 3
      src/kernel/task/thread.rs
  79. 1 1
      src/kernel/timer.rs
  80. 87 0
      src/kernel/user/dataflow.rs
  81. 39 13
      src/kernel/vfs/file.rs
  82. 18 4
      src/kernel/vfs/inode.rs
  83. 3 1
      src/kernel_init.rs
  84. 28 4
      src/lib.rs
  85. BIN
      user-programs/busybox.la64
  86. 97 0
      user-programs/init_script_loongarch64.sh
  87. 6 6
      user-programs/init_script_riscv64.sh

+ 66 - 0
.github/workflows/test-build.yaml

@@ -0,0 +1,66 @@
+name: Test Build
+
+on:
+  push:
+    branches-ignore:
+      - comp-and-judge
+  pull_request:
+    branches-ignore:
+      - comp-and-judge
+  workflow_dispatch:
+
+jobs:
+  build-and-run-test:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        arch: [riscv64, loongarch64]
+        include:
+          - arch: riscv64
+            target: riscv64gc-unknown-none-elf
+          - arch: loongarch64
+            target: loongarch64-unknown-none-softfloat
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Setup rust nightly
+        run: rustup component add rust-src llvm-tools
+
+      - name: Setup QEMU
+        run: |
+          sudo apt-get install -y qemu-system-${{ matrix.arch }} qemu-kvm
+
+      - name: Configure
+        run: ./configure
+
+      - name: Run build for ${{ matrix.arch }} targets
+        run: |
+          make build ARCH=${{ matrix.arch }} MODE=release
+          zstd -k build/boot-${{ matrix.arch }}.img
+
+      - name: Upload build artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: eonix-kernel-and-image-${{ matrix.arch }}
+          path: |
+            build/${{ matrix.target }}/release/eonix_kernel
+            build/boot-${{ matrix.arch }}.img.zst
+
+      - name: Test run for ${{ matrix.arch }}
+        run: |
+          echo "Fixing permissions for /dev/kvm..."
+          sudo adduser $USER kvm
+          sh script/test.sh
+        env:
+          ARCH: ${{ matrix.arch }}
+          QEMU_ACCEL: ''
+        timeout-minutes: 2
+        continue-on-error: true
+
+      - name: Upload run log
+        uses: actions/upload-artifact@v4
+        with:
+          name: test-${{ matrix.arch }}.log
+          path: build/test-*.log

+ 11 - 0
Cargo.lock

@@ -115,6 +115,7 @@ dependencies = [
  "eonix_sync_base",
  "fdt",
  "intrusive_list",
+ "loongArch64",
  "riscv",
  "sbi",
 ]
@@ -290,6 +291,16 @@ version = "0.4.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
 
+[[package]]
+name = "loongArch64"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7c9f0d275c70310e2a9d2fc23250c5ac826a73fa828a5f256401f85c5c554283"
+dependencies = [
+ "bit_field",
+ "bitflags",
+]
+
 [[package]]
 name = "memoffset"
 version = "0.9.1"

+ 11 - 7
Cargo.toml

@@ -32,34 +32,38 @@ align_ext = "0.1.0"
 xmas-elf = "0.10.0"
 another_ext4 = { git = "https://github.com/SMS-Derfflinger/another_ext4", branch = "main" }
 
-[target.'cfg(target_arch = "riscv64")'.dependencies]
+[target.'cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))'.dependencies]
 virtio-drivers = { version = "0.11.0" }
 
 [features]
 default = []
+trace_pci = []
 trace_syscall = []
 trace_scheduler = []
-log_trace = ["trace_syscall", "trace_scheduler"]
+log_trace = ["trace_pci", "trace_syscall", "trace_scheduler"]
 log_debug = []
 smp = []
 
+[profile.release]
+debug = true
+
 [profile.dev]
 panic = "abort"
 
 [profile.dev.package.eonix_preempt]
-opt-level = 2
+opt-level = "s"
 
 [profile.dev.package.eonix_runtime]
-opt-level = 0
+opt-level = "s"
 
 [profile.dev.package.eonix_sync]
-opt-level = 2
+opt-level = "s"
 
 [profile.dev.package.intrusive_list]
-opt-level = 2
+opt-level = "s"
 
 [profile.dev.package.eonix_hal]
-opt-level = 0
+opt-level = "s"
 
 [profile.dev.package."*"]
 opt-level = "s"

+ 44 - 6
Makefile.src

@@ -21,15 +21,19 @@ KERNEL_SOURCES := $(shell find src macros crates -name '*.rs' -type f)
 KERNEL_CARGO_MANIFESTS += $(shell find src macros crates -name Cargo.toml -type f)
 KERNEL_DEPS := $(KERNEL_SOURCES) $(KERNEL_CARGO_MANIFESTS)
 
-QEMU_ARGS ?= -no-reboot -no-shutdown
+QEMU_ARGS ?= -no-reboot
 CARGO_FLAGS := --profile $(PROFILE) --features $(FEATURES)$(if $(SMP),$(COMMA)smp,)
 
 ifeq ($(HOST),darwin)
 QEMU_ACCEL ?= -accel tcg
 else ifeq ($(HOST),linux)
+
+ifeq ($(shell ls /dev/kvm),/dev/kvm)
 QEMU_ACCEL ?= -accel kvm
 endif
 
+endif
+
 QEMU_ARGS += $(QEMU_ACCEL)
 
 ifneq ($(DEBUG_TRAPS),)
@@ -64,6 +68,30 @@ CARGO_FLAGS += --target riscv64gc-unknown-none-elf
 .PHONY: build
 build: $(BINARY_DIR)/eonix_kernel build/boot-riscv64.img
 
+else ifeq ($(ARCH),loongarch64)
+
+BINARY_DIR_BASE := build/loongarch64-unknown-none-softfloat
+BINARY_DIR := $(BINARY_DIR_BASE)/$(MODE)
+
+QEMU_ARGS += \
+	-machine virt -kernel $(BINARY_DIR)/eonix_kernel -m 1G \
+	-device virtio-blk-pci,drive=disk0 \
+	-device virtio-net-pci,netdev=mynet0 \
+	-drive id=disk0,file=build/boot-loongarch64.img,format=raw,if=none \
+	-netdev user,id=mynet0,hostfwd=tcp::5555-:5555,hostfwd=udp::5555-:5555 \
+	-rtc base=utc
+
+ifneq ($(IMG),)
+QEMU_ARGS += \
+	-drive id=disk1,file=$(IMG),format=raw,if=none \
+	-device virtio-blk-pci,drive=disk1
+endif
+
+CARGO_FLAGS += --target loongarch64-unknown-none-softfloat
+
+.PHONY: build
+build: $(BINARY_DIR)/eonix_kernel build/boot-loongarch64.img
+
 else ifeq ($(ARCH),x86_64)
 
 BINARY_DIR_BASE := build/x86_64-unknown-none
@@ -98,6 +126,10 @@ run: build build/kernel.sym
 srun: build build/kernel.sym
 	$(QEMU) $(QEMU_ARGS) -display none -S -s -serial mon:stdio
 
+.PHONY: test-run
+test-run: build
+	$(QEMU) $(QEMU_ARGS) -display none -serial stdio
+
 .PHONY: clean
 clean:
 	-rm -rf build
@@ -128,22 +160,22 @@ tmux-debug:
 	tmux kill-session -t gbos-debug
 
 $(BINARY_DIR)/eonix_kernel: $(KERNEL_DEPS)
-	cargo build $(CARGO_FLAGS)
+	CARGO_TARGET_DIR=build cargo build $(CARGO_FLAGS)
 
 build/kernel.sym: $(BINARY_DIR)/eonix_kernel
-	cargo objcopy -q $(CARGO_FLAGS) -- --only-keep-debug build/kernel.sym
+	CARGO_TARGET_DIR=build cargo objcopy -q $(CARGO_FLAGS) -- --only-keep-debug build/kernel.sym
 
 build/fs-%.img: user-programs/init_script_%.sh script/build-img.sh $(USER_PROGRAMS)
 	ARCH=$* OUTPUT=$@ sh script/build-img.sh
 
 build/mbr.bin: $(BINARY_DIR)/eonix_kernel
-	cargo objcopy -q $(CARGO_FLAGS) -- -O binary -j .mbr build/mbr.bin
+	CARGO_TARGET_DIR=build cargo objcopy -q $(CARGO_FLAGS) -- -O binary -j .mbr build/mbr.bin
 
 build/stage1.bin: $(BINARY_DIR)/eonix_kernel
-	cargo objcopy -q $(CARGO_FLAGS) -- -O binary -j .stage1 build/stage1.bin
+	CARGO_TARGET_DIR=build cargo objcopy -q $(CARGO_FLAGS) -- -O binary -j .stage1 build/stage1.bin
 
 build/kernel.bin: $(BINARY_DIR)/eonix_kernel
-	cargo objcopy -q $(CARGO_FLAGS) -- -O binary --strip-debug \
+	CARGO_TARGET_DIR=build cargo objcopy -q $(CARGO_FLAGS) -- -O binary --strip-debug \
 		-R .mbr -R .stage1 build/kernel.bin
 
 build/boot-x86_64.img: build/fs-x86_64.img build/mbr.bin build/stage1.bin build/kernel.bin
@@ -161,4 +193,10 @@ build/boot-riscv64.img: build/fs-riscv64.img
 	sh -c 'echo n; echo; echo; echo 8192; echo; echo a; echo w' \
 		| $(FDISK) $@ 2> /dev/null > /dev/null
 
+build/boot-loongarch64.img: build/fs-loongarch64.img
+	dd if=$< of=$@ bs=$(shell expr 4 \* 1024 \* 1024) \
+		seek=1 conv=notrunc 2> /dev/null
+	sh -c 'echo n; echo; echo; echo 8192; echo; echo a; echo w' \
+		| $(FDISK) $@ 2> /dev/null > /dev/null
+
 .DEFAULT_GOAL := build

+ 12 - 6
configure

@@ -1,6 +1,12 @@
 #!/bin/sh
 DEFAULT_ARCH="x86_64"
 
+if [ "$OUT" = "" ]; then
+    OUT="Makefile"
+fi
+
+printf "Configuring for %s...\n" "$OUT"
+
 event() {
     printf "$1... "
 }
@@ -119,10 +125,10 @@ else
     echo "$IMG"
 fi
 
-cp Makefile.src Makefile
-sed -i '' -e "s|##DEFAULT_ARCH##|$DEFAULT_ARCH|" Makefile > /dev/null 2>&1
-sed -i '' -e "s|##GDB##|$GDB|" Makefile > /dev/null 2>&1
-sed -i '' -e "s|##QEMU##|$QEMU|" Makefile > /dev/null 2>&1
-sed -i '' -e "s|##FDISK##|$FDISK|" Makefile > /dev/null 2>&1
-sed -i '' -e "s|##IMAGE##|$IMG|" Makefile > /dev/null 2>&1
+cp Makefile.src "$OUT"
+sed -i '' -e "s|##DEFAULT_ARCH##|$DEFAULT_ARCH|" "$OUT" > /dev/null 2>&1
+sed -i '' -e "s|##GDB##|$GDB|" "$OUT" > /dev/null 2>&1
+sed -i '' -e "s|##QEMU##|$QEMU|" "$OUT" > /dev/null 2>&1
+sed -i '' -e "s|##FDISK##|$FDISK|" "$OUT" > /dev/null 2>&1
+sed -i '' -e "s|##IMAGE##|$IMG|" "$OUT" > /dev/null 2>&1
 exit 0

+ 4 - 0
crates/eonix_hal/Cargo.toml

@@ -23,3 +23,7 @@ sbi = "0.3.0"
 riscv = { version = "0.13.0", features = ["s-mode"] }
 fdt = "0.1"
 bitflags = "2.6.0"
+
+[target.'cfg(target_arch = "loongarch64")'.dependencies]
+loongArch64 = "0.2.5"
+fdt = "0.1"

+ 15 - 0
crates/eonix_hal/build.rs

@@ -19,6 +19,18 @@ fn process_ldscript_riscv64(script: &mut String) -> Result<(), Box<dyn std::erro
     Ok(())
 }
 
+fn process_ldscript_loongarch64(script: &mut String) -> Result<(), Box<dyn std::error::Error>> {
+    println!("cargo:extra-link-args= --no-check-sections");
+
+    let memory = read_dependent_script("src/arch/loongarch64/memory.x")?;
+    let link = read_dependent_script("src/arch/loongarch64/link.x")?;
+
+    *script = memory + script;
+    script.push_str(&link);
+
+    Ok(())
+}
+
 fn process_ldscript_x86(script: &mut String) -> Result<(), Box<dyn std::error::Error>> {
     // Otherwise `bootstrap.rs` might be ignored and not linked in.
     println!("cargo:extra-link-args=--undefined=move_mbr --no-check-sections");
@@ -43,6 +55,9 @@ fn process_ldscript_arch(
         "riscv64" => {
             process_ldscript_riscv64(script)?;
         }
+        "loongarch64" => {
+            process_ldscript_loongarch64(script)?;
+        }
         _ => panic!("Unsupported architecture: {}", arch),
     }
 

+ 293 - 0
crates/eonix_hal/src/arch/loongarch64/bootstrap.rs

@@ -0,0 +1,293 @@
+use super::cpu::CPUID;
+use super::cpu::CPU_COUNT;
+use crate::{
+    arch::{
+        cpu::CPU,
+        mm::{ArchPhysAccess, PageAttribute64, GLOBAL_PAGE_TABLE},
+        trap::CSR_KERNEL_TP,
+    },
+    bootstrap::BootStrapData,
+    mm::{
+        flush_tlb_all, ArchMemory, ArchPagingMode, BasicPageAlloc, BasicPageAllocRef,
+        ScopedAllocator,
+    },
+};
+use core::arch::naked_asm;
+use core::{
+    alloc::Allocator,
+    arch::asm,
+    cell::RefCell,
+    sync::atomic::{AtomicBool, AtomicUsize, Ordering},
+};
+use eonix_hal_traits::mm::Memory;
+use eonix_mm::{
+    address::{Addr as _, PAddr, PRange, PhysAccess, VAddr, VRange},
+    page_table::{PageAttribute, PagingMode, PTE as _},
+    paging::{Page, PageAccess, PageAlloc, PAGE_SIZE, PFN},
+};
+use eonix_percpu::PercpuArea;
+use loongArch64::register::ecfg;
+use loongArch64::register::ecfg::LineBasedInterrupt;
+use loongArch64::register::tcfg;
+use loongArch64::register::{euen, pgdl};
+
+#[unsafe(link_section = ".bootstrap.stack")]
+static BOOT_STACK: [u8; 4096 * 16] = [0; 4096 * 16];
+static BOOT_STACK_START: &'static [u8; 4096 * 16] = &BOOT_STACK;
+
+#[repr(C, align(4096))]
+struct PageTable([u64; 512]);
+
+/// map 0x8000_0000 to 0x8000_0000 and 0xffff_ffff_8000_0000
+#[unsafe(link_section = ".bootstrap.page_table.1")]
+static BOOT_PAGE_TABLE: PageTable = {
+    let mut arr = [0; 512];
+    arr[0] = 0 | 0x11d3; // G | W | P | H | Cached | D | V
+    arr[510] = 0 | 0x11d3; // G | W | P | H | Cached | D | V
+    arr[511] = 0x8000_2000 | (1 << 60); // PT1, PT
+
+    PageTable(arr)
+};
+
+#[unsafe(link_section = ".bootstrap.page_table.2")]
+#[used]
+static PT1: PageTable = {
+    let mut arr = [0; 512];
+    arr[510] = 0x8000_0000 | 0x11d3; // G | W | P | H | Cached | D | V
+
+    PageTable(arr)
+};
+
+/// bootstrap in rust
+#[unsafe(naked)]
+#[unsafe(no_mangle)]
+#[unsafe(link_section = ".bootstrap.entry")]
+unsafe extern "C" fn _start() -> ! {
+    naked_asm!(
+        "
+            li.d      $t0, 0xc
+            csrwr     $t0, {CSR_STLB_PAGE_SIZE}
+
+            li.d      $t0, {PWCL}
+            csrwr     $t0, {CSR_PWCL}
+
+            li.d      $t0, {PWCH}
+            csrwr     $t0, {CSR_PWCH}
+
+            la.global $t0, {tlb_refill_entry}
+            csrwr     $t0, {CSR_TLB_REFILL_ENTRY}
+
+            la.global $t0, {page_table}
+            move      $t1, $t0
+            csrwr     $t0, {CSR_PGDL}
+            csrwr     $t1, {CSR_PGDH}
+
+            dbar      0x0
+            invtlb    0x0, $zero, $zero
+
+            csrrd     $t0, {CSR_CRMD}
+            li.d      $t1, ~0x18
+            and       $t0, $t0, $t1
+            ori       $t0, $t0,  0x10
+            csrwr     $t0, {CSR_CRMD}
+
+            la.global $sp, {boot_stack}
+            li.d      $t0, 0xffffff0000000000
+            or        $sp, $sp, $t0
+            li.d      $t0, {BOOT_STACK_SIZE}
+            add.d     $sp, $sp, $t0
+
+            csrrd     $a0, {CSR_CPUID}
+            move      $ra, $zero
+
+            la.global $t0, {riscv64_start}
+            jirl      $zero, $t0, 0
+        ",
+        boot_stack = sym BOOT_STACK,
+        BOOT_STACK_SIZE = const size_of_val(&BOOT_STACK),
+        CSR_CRMD = const 0x00,
+        CSR_PGDL = const 0x19,
+        CSR_PGDH = const 0x1a,
+        CSR_PWCL = const 0x1c,
+        CSR_PWCH = const 0x1d,
+        CSR_STLB_PAGE_SIZE = const 0x1e,
+        CSR_CPUID = const 0x20,
+        CSR_TLB_REFILL_ENTRY = const 0x88,
+        PWCL = const (12 << 0) | (9 << 5) | (21 << 10) | (9 << 15) | (30 << 20) | (9 << 25) | (0 << 30),
+        PWCH = const (39 << 0) | (9 << 6),
+        tlb_refill_entry = sym tlb_refill_entry,
+        page_table = sym BOOT_PAGE_TABLE,
+        riscv64_start = sym riscv64_start,
+    )
+}
+
+#[unsafe(naked)]
+#[unsafe(link_section = ".bootstrap.tlb_fill_entry")]
+unsafe extern "C" fn tlb_refill_entry() {
+    naked_asm!(
+        "csrwr   $t0, {CSR_TLBRSAVE}",
+        "csrrd   $t0, {CSR_PGD}",
+        "lddir   $t0, $t0, 3",
+        "lddir   $t0, $t0, 2",
+        "lddir   $t0, $t0, 1",
+        "ldpte   $t0, 0",
+        "ldpte   $t0, 1",
+        "tlbfill",
+        "csrrd   $t0, {CSR_TLBRSAVE}",
+        "ertn",
+        CSR_TLBRSAVE = const 0x8b,
+        CSR_PGD = const 0x1b,
+    )
+}
+
+/// TODO:
+/// 启动所有的cpu
+pub unsafe extern "C" fn riscv64_start(hart_id: usize) -> ! {
+    pgdl::set_base(0xffff_ffff_ffff_0000);
+    flush_tlb_all();
+
+    let real_allocator = RefCell::new(BasicPageAlloc::new());
+    let alloc = BasicPageAllocRef::new(&real_allocator);
+
+    for range in ArchMemory::free_ram() {
+        real_allocator.borrow_mut().add_range(range);
+    }
+
+    setup_kernel_page_table(&alloc);
+
+    setup_cpu(&alloc, hart_id);
+
+    // TODO: set up interrupt, smp
+    ScopedAllocator::new(&mut [0; 1024])
+        .with_alloc(|mem_alloc| bootstrap_smp(mem_alloc, &real_allocator));
+
+    unsafe extern "Rust" {
+        fn _eonix_hal_main(_: BootStrapData) -> !;
+    }
+
+    let start = unsafe {
+        ((&BOOT_STACK_START) as *const &'static [u8; 4096 * 16]).read_volatile() as *const _
+            as usize
+    };
+    let bootstrap_data = BootStrapData {
+        early_stack: PRange::new(
+            PAddr::from(start),
+            PAddr::from(start + size_of_val(&BOOT_STACK)),
+        ),
+        allocator: Some(real_allocator),
+    };
+
+    unsafe {
+        _eonix_hal_main(bootstrap_data);
+    }
+}
+
+unsafe extern "C" {
+    fn BSS_LENGTH();
+    fn KIMAGE_PAGES();
+}
+
+fn setup_kernel_page_table(alloc: impl PageAlloc) {
+    let global_page_table = &GLOBAL_PAGE_TABLE;
+
+    let attr = PageAttribute::WRITE
+        | PageAttribute::READ
+        | PageAttribute::EXECUTE
+        | PageAttribute::GLOBAL
+        | PageAttribute::PRESENT
+        | PageAttribute::ACCESSED
+        | PageAttribute::DIRTY;
+
+    const KERNEL_BSS_START: VAddr = VAddr::from(0xffffffff40000000);
+
+    // Map kernel BSS
+    for pte in global_page_table.iter_kernel_in(
+        VRange::from(KERNEL_BSS_START).grow(BSS_LENGTH as usize),
+        ArchPagingMode::LEVELS,
+        &alloc,
+    ) {
+        let page = Page::alloc_in(&alloc);
+
+        let attr = {
+            let mut attr = attr.clone();
+            attr.remove(PageAttribute::EXECUTE);
+            attr
+        };
+        pte.set(page.into_raw(), attr.into());
+    }
+
+    flush_tlb_all();
+
+    unsafe {
+        core::ptr::write_bytes(KERNEL_BSS_START.addr() as *mut (), 0, BSS_LENGTH as usize);
+    }
+}
+
+/// set up tp register to percpu
+fn setup_cpu(alloc: impl PageAlloc, hart_id: usize) {
+    // enable FPU
+    euen::set_fpe(true);
+    euen::set_sxe(true);
+
+    CPU_COUNT.fetch_add(1, Ordering::Relaxed);
+
+    let mut percpu_area = PercpuArea::new(|layout| {
+        let page_count = layout.size().div_ceil(PAGE_SIZE);
+        let page = Page::alloc_at_least_in(page_count, alloc);
+
+        let ptr = ArchPhysAccess::get_ptr_for_page(&page).cast();
+        page.into_raw();
+
+        ptr
+    });
+
+    // set tp(x4) register
+    percpu_area.setup(|pointer| {
+        let percpu_base_addr = pointer.addr().get();
+        unsafe {
+            asm!(
+                "move $tp, {0}",
+                in(reg) percpu_base_addr,
+                options(nostack, preserves_flags)
+            );
+        }
+    });
+
+    CPUID.set(hart_id);
+
+    let mut cpu = CPU::local();
+    unsafe {
+        cpu.as_mut().init();
+    }
+
+    percpu_area.register(cpu.cpuid());
+
+    unsafe {
+        asm!(
+            "csrwr {tp}, {CSR_KERNEL_TP}",
+            tp = inout(reg) PercpuArea::get_for(cpu.cpuid()).unwrap().as_ptr() => _,
+            CSR_KERNEL_TP = const CSR_KERNEL_TP,
+        )
+    }
+
+    let timer_frequency = loongArch64::time::get_timer_freq();
+
+    // 1ms periodic timer.
+    tcfg::set_init_val(timer_frequency / 1_000);
+    tcfg::set_periodic(true);
+    tcfg::set_en(true);
+
+    ecfg::set_lie(LineBasedInterrupt::all());
+}
+
+/// TODO
+fn bootstrap_smp(alloc: impl Allocator, page_alloc: &RefCell<BasicPageAlloc>) {}
+
+pub fn shutdown() -> ! {
+    let ged_addr = PAddr::from(0x100E001C);
+    unsafe {
+        let ged_ptr = ArchPhysAccess::as_ptr::<u8>(ged_addr);
+        ged_ptr.write_volatile(0x34);
+        loop {}
+    }
+}

+ 110 - 0
crates/eonix_hal/src/arch/loongarch64/context.rs

@@ -0,0 +1,110 @@
+use core::arch::naked_asm;
+use eonix_hal_traits::context::RawTaskContext;
+
+#[repr(C)]
+#[derive(Debug)]
+pub struct TaskContext {
+    sx: [u64; 9],
+    sp: u64,
+    ra: u64,
+    fp: u64,
+    crmd: usize,
+}
+
+impl RawTaskContext for TaskContext {
+    fn new() -> Self {
+        Self::new()
+    }
+
+    fn set_program_counter(&mut self, pc: usize) {
+        self.ra = pc as u64;
+    }
+
+    fn set_stack_pointer(&mut self, sp: usize) {
+        self.sp = sp as u64;
+    }
+
+    fn is_interrupt_enabled(&self) -> bool {
+        self.crmd & (1 << 2) != 0
+    }
+
+    fn set_interrupt_enabled(&mut self, is_enabled: bool) {
+        if is_enabled {
+            self.crmd = self.crmd | (1 << 2);
+        } else {
+            self.crmd = self.crmd & !(1 << 2);
+        }
+    }
+
+    fn call(&mut self, func: unsafe extern "C" fn(usize) -> !, arg: usize) {
+        self.sx[0] = func as u64;
+        self.sx[1] = arg as u64;
+
+        self.set_program_counter(Self::do_call as usize);
+    }
+
+    #[unsafe(naked)]
+    unsafe extern "C" fn switch(from: &mut Self, to: &mut Self) {
+        // Input arguments `from` and `to` will be in `a0` (x10) and `a1` (x11).
+        naked_asm!(
+            // Save current task's callee-saved registers to `from` context
+            "st.d  $s0, $a0,  0",
+            "st.d  $s1, $a0,  8",
+            "st.d  $s2, $a0, 16",
+            "st.d  $s3, $a0, 24",
+            "st.d  $s4, $a0, 32",
+            "st.d  $s5, $a0, 40",
+            "st.d  $s6, $a0, 48",
+            "st.d  $s7, $a0, 56",
+            "st.d  $s8, $a0, 64",
+            "st.d  $sp, $a0, 72",
+            "st.d  $ra, $a0, 80",
+            "st.d  $fp, $a0, 88",
+            "csrrd $t0, 0", // CRMD
+            "st.d  $t0, $a0, 96",
+            "",
+            "ld.d  $s0, $a1,  0",
+            "ld.d  $s1, $a1,  8",
+            "ld.d  $s2, $a1, 16",
+            "ld.d  $s3, $a1, 24",
+            "ld.d  $s4, $a1, 32",
+            "ld.d  $s5, $a1, 40",
+            "ld.d  $s6, $a1, 48",
+            "ld.d  $s7, $a1, 56",
+            "ld.d  $s8, $a1, 64",
+            "ld.d  $sp, $a1, 72",
+            "ld.d  $ra, $a1, 80",
+            "ld.d  $fp, $a1, 88",
+            "ld.d  $t0, $a1, 96",
+            "csrwr $t0, 0", // CRMD
+            "ret",
+        );
+    }
+}
+
+impl TaskContext {
+    pub const fn new() -> Self {
+        Self {
+            sx: [0; 9],
+            sp: 0,
+            ra: 0,
+            fp: 0,
+            crmd: 1 << 4, // PG = 1
+        }
+    }
+
+    #[unsafe(naked)]
+    /// Maximum of 5 arguments supported.
+    unsafe extern "C" fn do_call() -> ! {
+        naked_asm!(
+            "move $a0, $s1", // Args
+            "move $a1, $s2",
+            "move $a2, $s3",
+            "move $a3, $s4",
+            "move $a4, $s5",
+            "move $fp, $zero", // Set frame pointer to 0.
+            "move $ra, $zero",
+            "jirl $zero, $s0, 0",
+        );
+    }
+}

+ 72 - 0
crates/eonix_hal/src/arch/loongarch64/cpu.rs

@@ -0,0 +1,72 @@
+use super::trap::setup_trap;
+use core::sync::atomic::AtomicUsize;
+use core::{arch::asm, pin::Pin, ptr::NonNull};
+use eonix_preempt::PreemptGuard;
+use eonix_sync_base::LazyLock;
+
+pub static CPU_COUNT: AtomicUsize = AtomicUsize::new(0);
+
+#[eonix_percpu::define_percpu]
+pub static CPUID: usize = 0;
+
+#[eonix_percpu::define_percpu]
+static LOCAL_CPU: LazyLock<CPU> = LazyLock::new(|| CPU::new(CPUID.get()));
+
+#[derive(Debug, Clone)]
+pub enum UserTLS {
+    Base(u64),
+}
+
+pub struct CPU {}
+
+impl UserTLS {
+    pub fn new(base: u64) -> Self {
+        Self::Base(base)
+    }
+}
+
+impl CPU {
+    fn new(cpuid: usize) -> Self {
+        Self {}
+    }
+
+    /// Load CPU specific configurations for the current Hart.
+    ///
+    /// # Safety
+    /// This function performs low-level hardware initialization and should
+    /// only be called once per Hart during its boot sequence.
+    pub unsafe fn init(mut self: Pin<&mut Self>) {
+        let me = self.as_mut().get_unchecked_mut();
+        setup_trap();
+    }
+
+    /// Boot all other hart.
+    pub unsafe fn bootstrap_cpus(&self) {
+        // todo
+    }
+
+    pub unsafe fn load_interrupt_stack(self: Pin<&mut Self>, _: u64) {}
+
+    pub fn set_tls32(self: Pin<&mut Self>, _user_tls: &UserTLS) {
+        // nothing
+    }
+
+    pub fn local() -> PreemptGuard<Pin<&'static mut Self>> {
+        unsafe {
+            // SAFETY: We pass the reference into a `PreemptGuard`, which ensures
+            //         that preemption is disabled.
+            PreemptGuard::new(Pin::new_unchecked(LOCAL_CPU.as_mut().get_mut()))
+        }
+    }
+
+    pub fn cpuid(&self) -> usize {
+        CPUID.get()
+    }
+}
+
+#[inline(always)]
+pub fn halt() {
+    unsafe {
+        loongArch64::asm::idle();
+    }
+}

+ 32 - 0
crates/eonix_hal/src/arch/loongarch64/fdt.rs

@@ -0,0 +1,32 @@
+use super::mm::ArchPhysAccess;
+use core::sync::atomic::{AtomicPtr, Ordering};
+use eonix_mm::address::{Addr, PAddr, PRange, PhysAccess};
+use eonix_sync_base::LazyLock;
+use fdt::Fdt;
+
+const FDT_PADDR: PAddr = PAddr::from_val(0x100000);
+
+pub static FDT: LazyLock<Fdt<'static>> = LazyLock::new(|| unsafe {
+    Fdt::from_ptr(ArchPhysAccess::as_ptr(FDT_PADDR).as_ptr())
+        .expect("Failed to parse DTB from static memory.")
+});
+
+pub trait FdtExt {
+    fn harts(&self) -> impl Iterator<Item = usize>;
+
+    fn hart_count(&self) -> usize {
+        self.harts().count()
+    }
+
+    fn present_ram(&self) -> impl Iterator<Item = PRange>;
+}
+
+impl FdtExt for Fdt<'_> {
+    fn harts(&self) -> impl Iterator<Item = usize> {
+        self.cpus().map(|cpu| cpu.ids().all()).flatten()
+    }
+
+    fn present_ram(&self) -> impl Iterator<Item = PRange> {
+        core::iter::empty()
+    }
+}

+ 58 - 0
crates/eonix_hal/src/arch/loongarch64/fence.rs

@@ -0,0 +1,58 @@
+use core::{
+    arch::asm,
+    sync::atomic::{compiler_fence, Ordering},
+};
+
+#[doc(hidden)]
+/// Issues a full memory barrier.
+///
+/// Ensures all memory operations issued before the fence are globally
+/// visible before any memory operations issued after the fence.
+pub fn memory_barrier() {
+    unsafe {
+        // A full memory barrier to prevent the compiler from reordering.
+        compiler_fence(Ordering::SeqCst);
+
+        // rw for both predecessor and successor: read-write, read-write
+        asm!("dbar 0x0", options(nostack, nomem, preserves_flags));
+
+        // A full memory barrier to prevent the compiler from reordering.
+        compiler_fence(Ordering::SeqCst);
+    }
+}
+
+#[doc(hidden)]
+/// Issues a read memory barrier.
+///
+/// Ensures all memory loads issued before the fence are globally
+/// visible before any memory loads issued after the fence.
+pub fn read_memory_barrier() {
+    unsafe {
+        // A full memory barrier to prevent the compiler from reordering.
+        compiler_fence(Ordering::SeqCst);
+
+        // r for both predecessor and successor: read, read
+        asm!("dbar 0x05", options(nostack, nomem, preserves_flags));
+
+        // A full memory barrier to prevent the compiler from reordering.
+        compiler_fence(Ordering::SeqCst);
+    }
+}
+
+#[doc(hidden)]
+/// Issues a write memory barrier.
+///
+/// Ensures all memory stores issued before the fence are globally
+/// visible before any memory stores issued after the fence.
+pub fn write_memory_barrier() {
+    unsafe {
+        // A full memory barrier to prevent the compiler from reordering.
+        compiler_fence(Ordering::SeqCst);
+
+        // w for both predecessor and successor: write, write
+        asm!("dbar 0x0a", options(nostack, nomem, preserves_flags));
+
+        // A full memory barrier to prevent the compiler from reordering.
+        compiler_fence(Ordering::SeqCst);
+    }
+}

+ 141 - 0
crates/eonix_hal/src/arch/loongarch64/fpu.rs

@@ -0,0 +1,141 @@
+use core::arch::asm;
+use eonix_hal_traits::fpu::RawFpuState;
+
+#[repr(C)]
+#[derive(Debug, Clone, Copy, Default)]
+pub struct FpuState {
+    pub fx: [u64; 32],
+    pub fcc: u64,
+    pub fcsr: u64,
+}
+
+impl RawFpuState for FpuState {
+    fn new() -> Self {
+        unsafe { core::mem::zeroed() }
+    }
+
+    /// Save reg -> mem
+    fn save(&mut self) {
+        unsafe {
+            asm!(
+            "fst.d $f0,  {base},  0 * 8",
+            "fst.d $f1,  {base},  1 * 8",
+            "fst.d $f2,  {base},  2 * 8",
+            "fst.d $f3,  {base},  3 * 8",
+            "fst.d $f4,  {base},  4 * 8",
+            "fst.d $f5,  {base},  5 * 8",
+            "fst.d $f6,  {base},  6 * 8",
+            "fst.d $f7,  {base},  7 * 8",
+            "fst.d $f8,  {base},  8 * 8",
+            "fst.d $f9,  {base},  9 * 8",
+            "fst.d $f10, {base}, 10 * 8",
+            "fst.d $f11, {base}, 11 * 8",
+            "fst.d $f12, {base}, 12 * 8",
+            "fst.d $f13, {base}, 13 * 8",
+            "fst.d $f14, {base}, 14 * 8",
+            "fst.d $f15, {base}, 15 * 8",
+            "fst.d $f16, {base}, 16 * 8",
+            "fst.d $f17, {base}, 17 * 8",
+            "fst.d $f18, {base}, 18 * 8",
+            "fst.d $f19, {base}, 19 * 8",
+            "fst.d $f20, {base}, 20 * 8",
+            "fst.d $f21, {base}, 21 * 8",
+            "fst.d $f22, {base}, 22 * 8",
+            "fst.d $f23, {base}, 23 * 8",
+            "fst.d $f24, {base}, 24 * 8",
+            "fst.d $f25, {base}, 25 * 8",
+            "fst.d $f26, {base}, 26 * 8",
+            "fst.d $f27, {base}, 27 * 8",
+            "fst.d $f28, {base}, 28 * 8",
+            "fst.d $f29, {base}, 29 * 8",
+            "fst.d $f30, {base}, 30 * 8",
+            "fst.d $f31, {base}, 31 * 8",
+            "",
+            "movcf2gr    {tmp}, $fcc0",
+            "move        {fcc}, {tmp}",
+            "movcf2gr    {tmp}, $fcc1",
+            "bstrins.d   {fcc}, {tmp}, 15, 8",
+            "movcf2gr    {tmp}, $fcc2",
+            "bstrins.d   {fcc}, {tmp}, 23, 16",
+            "movcf2gr    {tmp}, $fcc3",
+            "bstrins.d   {fcc}, {tmp}, 31, 24",
+            "movcf2gr    {tmp}, $fcc4",
+            "bstrins.d   {fcc}, {tmp}, 39, 32",
+            "movcf2gr    {tmp}, $fcc5",
+            "bstrins.d   {fcc}, {tmp}, 47, 40",
+            "movcf2gr    {tmp}, $fcc6",
+            "bstrins.d   {fcc}, {tmp}, 55, 48",
+            "movcf2gr    {tmp}, $fcc7",
+            "bstrins.d   {fcc}, {tmp}, 63, 56",
+            "",
+            "movfcsr2gr  {fcsr}, $fcsr0",
+            base = in(reg) &raw mut self.fx,
+            tmp = out(reg) _,
+            fcc = out(reg) self.fcc,
+            fcsr = out(reg) self.fcsr,
+            options(nostack, preserves_flags));
+        }
+    }
+
+    fn restore(&mut self) {
+        unsafe {
+            asm!(
+            "fld.d $f0,  {base},  0 * 8",
+            "fld.d $f1,  {base},  1 * 8",
+            "fld.d $f2,  {base},  2 * 8",
+            "fld.d $f3,  {base},  3 * 8",
+            "fld.d $f4,  {base},  4 * 8",
+            "fld.d $f5,  {base},  5 * 8",
+            "fld.d $f6,  {base},  6 * 8",
+            "fld.d $f7,  {base},  7 * 8",
+            "fld.d $f8,  {base},  8 * 8",
+            "fld.d $f9,  {base},  9 * 8",
+            "fld.d $f10, {base}, 10 * 8",
+            "fld.d $f11, {base}, 11 * 8",
+            "fld.d $f12, {base}, 12 * 8",
+            "fld.d $f13, {base}, 13 * 8",
+            "fld.d $f14, {base}, 14 * 8",
+            "fld.d $f15, {base}, 15 * 8",
+            "fld.d $f16, {base}, 16 * 8",
+            "fld.d $f17, {base}, 17 * 8",
+            "fld.d $f18, {base}, 18 * 8",
+            "fld.d $f19, {base}, 19 * 8",
+            "fld.d $f20, {base}, 20 * 8",
+            "fld.d $f21, {base}, 21 * 8",
+            "fld.d $f22, {base}, 22 * 8",
+            "fld.d $f23, {base}, 23 * 8",
+            "fld.d $f24, {base}, 24 * 8",
+            "fld.d $f25, {base}, 25 * 8",
+            "fld.d $f26, {base}, 26 * 8",
+            "fld.d $f27, {base}, 27 * 8",
+            "fld.d $f28, {base}, 28 * 8",
+            "fld.d $f29, {base}, 29 * 8",
+            "fld.d $f30, {base}, 30 * 8",
+            "fld.d $f31, {base}, 31 * 8",
+            "",
+            "bstrpick.d  {tmp}, {fcc}, 7, 0",
+            "movgr2cf    $fcc0, {tmp}",
+            "bstrpick.d  {tmp}, {fcc}, 15, 8",
+            "movgr2cf    $fcc1, {tmp}",
+            "bstrpick.d  {tmp}, {fcc}, 23, 16",
+            "movgr2cf    $fcc2, {tmp}",
+            "bstrpick.d  {tmp}, {fcc}, 31, 24",
+            "movgr2cf    $fcc3, {tmp}",
+            "bstrpick.d  {tmp}, {fcc}, 39, 32",
+            "movgr2cf    $fcc4, {tmp}",
+            "bstrpick.d  {tmp}, {fcc}, 47, 40",
+            "movgr2cf    $fcc5, {tmp}",
+            "bstrpick.d  {tmp}, {fcc}, 55, 48",
+            "movgr2cf    $fcc6, {tmp}",
+            "bstrpick.d  {tmp}, {fcc}, 63, 56",
+            "movgr2cf    $fcc7, {tmp}",
+            "",
+            "movgr2fcsr $fcsr0, {fcsr}",
+            base = in(reg) &raw mut self.fx,
+            fcc = in(reg) self.fcc,
+            fcsr = in(reg) self.fcsr,
+            tmp = out(reg) _,
+            options(nostack, preserves_flags));
+        }
+    }
+}

+ 96 - 0
crates/eonix_hal/src/arch/loongarch64/link.x

@@ -0,0 +1,96 @@
+SECTIONS {
+    .bootstrap ORIGIN(RAM) :
+    {
+        /* This needs to be aligned to PAGE_SIZE boundaries. */
+        KEEP(*(.bootstrap.tlb_fill_entry));
+
+        KEEP(*(.bootstrap.entry .bootstrap.data));
+
+        . = ORIGIN(RAM) + 0x1000;
+        KEEP(*(.bootstrap.page_table.1));
+        KEEP(*(.bootstrap.page_table.2));
+
+        . = ALIGN(16);
+        KEEP(*(.bootstrap.stack));
+    } > RAM
+
+    __kernel_start = ORIGIN(RAM);
+}
+INSERT BEFORE .text;
+
+SECTIONS {
+    .text.syscall_fns :
+    {
+
+        KEEP(*(.syscall_fns*));
+
+    } > REGION_TEXT AT> RAM
+}
+INSERT AFTER .text;
+
+SECTIONS {
+    .percpu : ALIGN(16)
+    {
+        __spercpu = .;
+
+        PERCPU_DATA_START = .;
+
+        . = ALIGN(16);
+
+        *(.percpu .percpu*);
+
+        . = ALIGN(16);
+        __epercpu = .;
+    } > REGION_RODATA AT> RAM
+
+    PERCPU_LENGTH = ABSOLUTE(__epercpu - __spercpu);
+
+    KIMAGE_PAGES = (__kernel_end - _stext + 0x1000 - 1) / 0x1000;
+    KIMAGE_32K_COUNT = (KIMAGE_PAGES + 8 - 1) / 8;
+
+    BSS_LENGTH = ABSOLUTE(__ebss - __sbss);
+}
+INSERT AFTER .rodata;
+
+SECTIONS {
+    .rodata.syscalls :
+    {
+        . = ALIGN(16);
+        __raw_syscall_handlers_start = .;
+
+        RAW_SYSCALL_HANDLERS = .;
+        KEEP(*(.raw_syscalls*));
+
+        __raw_syscall_handlers_end = .;
+
+        RAW_SYSCALL_HANDLERS_SIZE =
+            ABSOLUTE(__raw_syscall_handlers_end - __raw_syscall_handlers_start);
+    } > REGION_RODATA AT> RAM
+}
+INSERT AFTER .rodata;
+
+SECTIONS {
+    .rodata.fixups :
+    {
+        . = ALIGN(16);
+        FIX_START = .;
+
+        KEEP(*(.fix));
+
+        FIX_END = .;
+    } > REGION_RODATA AT> RAM
+}
+INSERT AFTER .rodata;
+
+SECTIONS {
+    .vdso ALIGN(0x1000) : ALIGN(0x1000)
+    {
+        KEEP(*(.vdso .vdso.*));
+
+        . = ALIGN(0x1000);
+    } > VDSO AT> RAM
+
+    VDSO_PADDR = LOADADDR(.vdso);
+    __kernel_end = ABSOLUTE(LOADADDR(.vdso) + SIZEOF(.vdso));
+}
+INSERT BEFORE .bss;

+ 23 - 0
crates/eonix_hal/src/arch/loongarch64/memory.x

@@ -0,0 +1,23 @@
+OUTPUT_ARCH(loongarch64)
+ENTRY(_start)
+
+MEMORY {
+    RAM    : org = 0x0000000080000000, len = 8M
+    VDSO   : org = 0x00007f0000000000, len = 4K
+    KBSS   : org = 0xffffffff40000000, len = 2M
+    KIMAGE : org = 0xffffffff80000000, len = 8M
+}
+
+REGION_ALIAS("REGION_TEXT", KIMAGE);
+REGION_ALIAS("REGION_RODATA", KIMAGE);
+REGION_ALIAS("REGION_DATA", KIMAGE);
+REGION_ALIAS("REGION_BSS", KBSS);
+REGION_ALIAS("REGION_EHFRAME", KIMAGE);
+
+REGION_ALIAS("LINK_REGION_TEXT", RAM);
+REGION_ALIAS("LINK_REGION_RODATA", RAM);
+REGION_ALIAS("LINK_REGION_DATA", RAM);
+REGION_ALIAS("LINK_REGION_BSS", RAM);
+REGION_ALIAS("LINK_REGION_EHFRAME", RAM);
+
+_stext = ORIGIN(REGION_TEXT) + LOADADDR(.text) - ORIGIN(RAM);

+ 316 - 0
crates/eonix_hal/src/arch/loongarch64/mm.rs

@@ -0,0 +1,316 @@
+use crate::traits::mm::Memory;
+use core::{
+    arch::asm,
+    marker::PhantomData,
+    ptr::NonNull,
+    sync::atomic::{compiler_fence, Ordering},
+};
+use eonix_mm::{
+    address::{Addr as _, AddrOps, PAddr, PRange, PhysAccess, VAddr},
+    page_table::{
+        PageAttribute, PageTable, PageTableLevel, PagingMode, RawAttribute, RawPageTable,
+        TableAttribute, PTE,
+    },
+    paging::{NoAlloc, Page, PageBlock, PAGE_SIZE, PFN},
+};
+use eonix_sync_base::LazyLock;
+use loongArch64::register::pgdl;
+
+pub const KIMAGE_OFFSET: usize = 0xffff_ffff_0000_0000;
+pub const ROOT_PAGE_TABLE_PFN: usize = 0x8000_1000 >> 12;
+pub const PAGE_TABLE_BASE: PFN = PFN::from_val(ROOT_PAGE_TABLE_PFN);
+pub static GLOBAL_PAGE_TABLE: LazyLock<PageTable<ArchPagingMode, NoAlloc, ArchPhysAccess>> =
+    LazyLock::new(|| unsafe {
+        Page::with_raw(PAGE_TABLE_BASE, |root_table_page| {
+            PageTable::with_root_table(root_table_page.clone())
+        })
+    });
+
+pub const PA_VP: u64 = ((1 << 0) | (1 << 7));
+pub const PA_D: u64 = 1 << 1;
+pub const PA_U: u64 = 3 << 2;
+pub const PA_CACHED: u64 = 1 << 4;
+pub const PA_G: u64 = 1 << 6;
+pub const PA_W: u64 = 1 << 8;
+pub const PA_NR: u64 = 1 << 61;
+pub const PA_NX: u64 = 1 << 62;
+
+// in RSW
+pub const PA_COW: u64 = 1 << 9;
+pub const PA_MMAP: u64 = 1 << 10;
+
+pub const PA_PT_USER: u64 = 1 << 59;
+pub const PA_PT: u64 = 1 << 60;
+
+pub const PA_FLAGS_MASK: u64 = 0xF800_0000_0000_0FFF;
+
+#[repr(transparent)]
+#[derive(Clone, Copy)]
+pub struct PTE64(u64);
+
+#[derive(Clone, Copy)]
+pub struct PageAttribute64(u64);
+
+pub struct RawPageTable48<'a>(NonNull<PTE64>, PhantomData<&'a ()>);
+
+pub struct PagingMode48;
+
+pub struct ArchPhysAccess;
+
+pub struct ArchMemory;
+
+impl PTE for PTE64 {
+    type Attr = PageAttribute64;
+
+    fn set(&mut self, pfn: PFN, attr: Self::Attr) {
+        let pfn = ((usize::from(pfn) as u64) << 12) & !PA_FLAGS_MASK;
+        self.0 = pfn | attr.0;
+    }
+
+    fn get(&self) -> (PFN, Self::Attr) {
+        let pfn = PFN::from((self.0 & !PA_FLAGS_MASK) as usize >> 12);
+        let attr = PageAttribute64(self.0 & PA_FLAGS_MASK);
+        (pfn, attr)
+    }
+}
+
+impl PagingMode for PagingMode48 {
+    type Entry = PTE64;
+    type RawTable<'a> = RawPageTable48<'a>;
+    const LEVELS: &'static [PageTableLevel] = &[
+        PageTableLevel::new(39, 9),
+        PageTableLevel::new(30, 9),
+        PageTableLevel::new(21, 9),
+        PageTableLevel::new(12, 9),
+    ];
+}
+
+pub type ArchPagingMode = PagingMode48;
+
+impl<'a> RawPageTable<'a> for RawPageTable48<'a> {
+    type Entry = PTE64;
+
+    fn index(&self, index: u16) -> &'a Self::Entry {
+        unsafe { self.0.add(index as usize).as_ref() }
+    }
+
+    fn index_mut(&mut self, index: u16) -> &'a mut Self::Entry {
+        unsafe { self.0.add(index as usize).as_mut() }
+    }
+
+    unsafe fn from_ptr(ptr: NonNull<PageBlock>) -> Self {
+        Self(ptr.cast(), PhantomData)
+    }
+}
+
+impl RawAttribute for PageAttribute64 {
+    fn null() -> Self {
+        Self(0)
+    }
+
+    fn as_table_attr(self) -> Option<TableAttribute> {
+        let mut table_attr = TableAttribute::empty();
+
+        if self.0 & PA_PT == PA_PT {
+            table_attr |= TableAttribute::PRESENT;
+        }
+
+        if self.0 & PA_PT_USER == PA_PT_USER {
+            table_attr |= TableAttribute::USER;
+        }
+
+        Some(table_attr)
+    }
+
+    fn as_page_attr(self) -> Option<PageAttribute> {
+        let mut page_attr = PageAttribute::empty();
+
+        if self.0 & PA_PT == PA_PT {
+            return None;
+        }
+
+        if self.0 & PA_VP == PA_VP {
+            page_attr |= PageAttribute::PRESENT;
+        }
+
+        if self.0 & PA_NR == 0 {
+            page_attr |= PageAttribute::READ;
+        }
+
+        if self.0 & PA_W != 0 {
+            page_attr |= PageAttribute::WRITE;
+        }
+
+        if self.0 & PA_NX == 0 {
+            page_attr |= PageAttribute::EXECUTE;
+        }
+
+        if self.0 & PA_U == PA_U {
+            page_attr |= PageAttribute::USER;
+        }
+
+        if self.0 & PA_D != 0 {
+            page_attr |= PageAttribute::DIRTY;
+        }
+
+        if self.0 & PA_G != 0 {
+            page_attr |= PageAttribute::GLOBAL;
+        }
+
+        if self.0 & PA_COW != 0 {
+            page_attr |= PageAttribute::COPY_ON_WRITE;
+        }
+
+        if self.0 & PA_MMAP != 0 {
+            page_attr |= PageAttribute::MAPPED;
+        }
+
+        Some(page_attr)
+    }
+}
+
+impl From<PageAttribute> for PageAttribute64 {
+    fn from(page_attr: PageAttribute) -> Self {
+        let mut raw_attr = PA_NR | PA_NX | PA_CACHED;
+
+        for attr in page_attr.iter() {
+            match attr {
+                PageAttribute::PRESENT => raw_attr |= PA_VP,
+                PageAttribute::READ => raw_attr &= !PA_NR,
+                PageAttribute::WRITE => raw_attr |= PA_W,
+                PageAttribute::EXECUTE => raw_attr &= !PA_NX,
+                PageAttribute::USER => raw_attr |= PA_U,
+                PageAttribute::DIRTY => raw_attr |= PA_D,
+                PageAttribute::GLOBAL => raw_attr |= PA_G,
+                PageAttribute::COPY_ON_WRITE => raw_attr |= PA_COW,
+                PageAttribute::MAPPED => raw_attr |= PA_MMAP,
+                PageAttribute::ACCESSED | PageAttribute::ANONYMOUS => {}
+                _ => unreachable!("Invalid page attribute"),
+            }
+        }
+
+        Self(raw_attr)
+    }
+}
+
+impl From<TableAttribute> for PageAttribute64 {
+    fn from(table_attr: TableAttribute) -> Self {
+        let mut raw_attr = 0;
+
+        for attr in table_attr.iter() {
+            match attr {
+                TableAttribute::PRESENT => raw_attr |= PA_PT,
+                TableAttribute::USER => raw_attr |= PA_PT_USER,
+                TableAttribute::GLOBAL | TableAttribute::ACCESSED => {}
+                _ => unreachable!("Invalid table attribute"),
+            }
+        }
+
+        Self(raw_attr)
+    }
+}
+
+impl ArchPhysAccess {
+    const PHYS_OFFSET: usize = 0xffff_ff00_0000_0000;
+}
+
+impl PhysAccess for ArchPhysAccess {
+    unsafe fn as_ptr<T>(paddr: PAddr) -> NonNull<T> {
+        let alignment: usize = align_of::<T>();
+        assert!(paddr.addr() % alignment == 0, "Alignment error");
+
+        unsafe {
+            // SAFETY: We can assume that we'll never have `self.addr()` equals
+            //         to `-PHYS_OFFSET`. Otherwise, the kernel might be broken.
+            NonNull::new_unchecked((Self::PHYS_OFFSET + paddr.addr()) as *mut T)
+        }
+    }
+
+    unsafe fn from_ptr<T>(ptr: NonNull<T>) -> PAddr {
+        let addr = ptr.addr().get();
+
+        assert!(addr % align_of::<T>() == 0, "Alignment error");
+        assert!(
+            addr >= Self::PHYS_OFFSET,
+            "Address is not a valid physical address"
+        );
+
+        PAddr::from_val(addr - Self::PHYS_OFFSET)
+    }
+}
+
+impl Memory for ArchMemory {
+    fn present_ram() -> impl Iterator<Item = PRange> {
+        let range1 = core::iter::once(PRange::from(PAddr::from_val(0)).grow(0x1000_0000));
+        let range2 = core::iter::once(PRange::from(PAddr::from_val(0x8000_0000)).grow(0x3000_0000));
+
+        range2.chain(range1)
+    }
+
+    fn free_ram() -> impl Iterator<Item = PRange> {
+        unsafe extern "C" {
+            fn __kernel_start();
+            fn __kernel_end();
+        }
+
+        let kernel_start = PAddr::from(__kernel_start as usize);
+        let kernel_end = PAddr::from(__kernel_end as usize);
+        let paddr_after_kimage_aligned = kernel_end.ceil_to(PAGE_SIZE);
+
+        Self::present_ram()
+            .filter(move |range| {
+                range.end() <= kernel_start || range.end() > paddr_after_kimage_aligned
+            })
+            .map(move |range| {
+                if range.end() > paddr_after_kimage_aligned
+                    && range.start() < paddr_after_kimage_aligned
+                {
+                    let (_, right) = range.split_at(paddr_after_kimage_aligned);
+                    right
+                } else {
+                    range
+                }
+            })
+    }
+}
+
+pub type DefaultPagingMode = PagingMode48;
+
+#[inline(always)]
+pub fn flush_tlb(vaddr: usize) {
+    unsafe {
+        asm!(
+            "dbar 0x0",
+            "invtlb 0x5, $zero, {vaddr}",
+            vaddr = in(reg) vaddr,
+        );
+    }
+}
+
+#[inline(always)]
+pub fn flush_tlb_all() {
+    unsafe {
+        asm!("dbar 0x0", "invtlb 0x0, $zero, $zero");
+    }
+}
+
+#[inline(always)]
+pub fn get_root_page_table_pfn() -> PFN {
+    PFN::from(PAddr::from(pgdl::read().base()))
+}
+
+#[inline(always)]
+pub fn set_root_page_table_pfn(pfn: PFN) {
+    compiler_fence(Ordering::SeqCst);
+
+    unsafe {
+        pgdl::set_base(PAddr::from(pfn).addr());
+    }
+
+    compiler_fence(Ordering::SeqCst);
+
+    // Invalidate all user space TLB entries.
+    unsafe {
+        asm!("dbar 0x0", "invtlb 0x0, $zero, $zero");
+    }
+}

+ 8 - 0
crates/eonix_hal/src/arch/loongarch64/mod.rs

@@ -0,0 +1,8 @@
+pub mod bootstrap;
+pub mod context;
+pub mod cpu;
+pub mod fdt;
+pub mod fence;
+pub mod fpu;
+pub mod mm;
+pub mod trap;

+ 348 - 0
crates/eonix_hal/src/arch/loongarch64/trap/mod.rs

@@ -0,0 +1,348 @@
+mod trap_context;
+
+use super::context::TaskContext;
+use core::arch::{asm, global_asm, naked_asm};
+use core::cell::UnsafeCell;
+use core::mem::{offset_of, size_of};
+use core::num::NonZero;
+use core::ptr::NonNull;
+use eonix_hal_traits::{
+    context::RawTaskContext,
+    trap::{IrqState as IrqStateTrait, TrapReturn},
+};
+use loongArch64::register::crmd::{self, Crmd};
+use loongArch64::register::ecfg;
+use loongArch64::register::eentry::{self, Eentry};
+
+pub use trap_context::*;
+
+pub const CSR_KERNEL_TP: usize = 0x30;
+const CSR_CAPTURED_TRAP_CONTEXT_ADDR: usize = 0x31;
+const CSR_CAPTURER_TASK_CONTEXT_ADDR: usize = 0x32;
+const CSR_T0: usize = 0x33;
+const CSR_T1: usize = 0x34;
+
+#[unsafe(naked)]
+unsafe extern "C" fn _raw_trap_entry() -> ! {
+    naked_asm!(
+        // Page alignment is required for trap entry points
+        ".align 12",
+        "csrwr  $t0,  {CSR_T0}",
+        "csrwr  $t1,  {CSR_T1}",
+        "csrrd  $t0,  {CSR_CAPTURED_TRAP_CONTEXT_ADDR}",
+        "move   $t1,  $sp",
+        "bnez   $t0,  2f",
+        // We came here from normal execution
+        "li.d   $t0, -16",
+        "and    $t0,  $t0, $sp",
+        "addi.d $t0,  $t0, -{trap_context_size}",
+        "move   $sp,  $t0",
+        // t0: &mut TrapContext
+        "2:",
+        "st.d   $ra,  $t0, {ra}",
+        "st.d   $tp,  $t0, {tp}",
+        "st.d   $t1,  $t0, {sp}", // $sp is saved in $t1
+        "csrrd  $ra,  {CSR_T0}", // Put old $t0 into $ra
+        "csrrd  $tp,  {CSR_T1}", // Put old $t1 into $tp
+        "st.d   $a0,  $t0, {a0}",
+        "st.d   $a1,  $t0, {a1}",
+        "st.d   $a2,  $t0, {a2}",
+        "st.d   $a3,  $t0, {a3}",
+        "st.d   $a4,  $t0, {a4}",
+        "st.d   $a5,  $t0, {a5}",
+        "st.d   $a6,  $t0, {a6}",
+        "st.d   $a7,  $t0, {a7}",
+        "st.d   $ra,  $t0, {t0}", // $t0 is saved in $ra
+        "st.d   $tp,  $t0, {t1}", // $t1 is saved in $tp
+        "st.d   $t2,  $t0, {t2}",
+        "st.d   $t3,  $t0, {t3}",
+        "st.d   $t4,  $t0, {t4}",
+        "st.d   $t5,  $t0, {t5}",
+        "st.d   $t6,  $t0, {t6}",
+        "st.d   $t7,  $t0, {t7}",
+        "st.d   $t8,  $t0, {t8}",
+        "st.d   $r21, $t0, {u0}",
+        "st.d   $fp,  $t0, {fp}",
+        "csrrd  $tp,  {CSR_KERNEL_TP}",
+        "csrrd  $t1,  {CSR_ESTAT}",
+        "csrrd  $t2,  {CSR_PRMD}",
+        "csrrd  $ra,  {CSR_ERA}",
+        "csrrd  $a1,  {CSR_CAPTURER_TASK_CONTEXT_ADDR}",
+        "st.d   $s0,  $t0, {s0}",
+        "st.d   $s1,  $t0, {s1}",
+        "st.d   $s2,  $t0, {s2}",
+        "st.d   $s3,  $t0, {s3}",
+        "st.d   $s4,  $t0, {s4}",
+        "st.d   $s5,  $t0, {s5}",
+        "st.d   $s6,  $t0, {s6}",
+        "st.d   $s7,  $t0, {s7}",
+        "st.d   $s8,  $t0, {s8}",
+        "st.d   $t1,  $t0, {estat}",
+        "st.d   $t2,  $t0, {prmd}",
+        "st.d   $ra,  $t0, {era}",
+        "bnez   $a1,  {captured_trap_handler}",
+        "b      {default_trap_handler}",
+        CSR_KERNEL_TP = const CSR_KERNEL_TP,
+        CSR_CAPTURED_TRAP_CONTEXT_ADDR = const CSR_CAPTURED_TRAP_CONTEXT_ADDR,
+        CSR_CAPTURER_TASK_CONTEXT_ADDR = const CSR_CAPTURER_TASK_CONTEXT_ADDR,
+        CSR_T0 = const CSR_T0,
+        CSR_T1 = const CSR_T1,
+        CSR_ESTAT = const 0x5,
+        CSR_PRMD = const 0x1,
+        CSR_ERA = const 0x6,
+        trap_context_size = const size_of::<TrapContext>(),
+        ra = const Registers::OFFSET_RA,
+        tp = const Registers::OFFSET_TP,
+        sp = const Registers::OFFSET_SP,
+        a0 = const Registers::OFFSET_A0,
+        a1 = const Registers::OFFSET_A1,
+        a2 = const Registers::OFFSET_A2,
+        a3 = const Registers::OFFSET_A3,
+        a4 = const Registers::OFFSET_A4,
+        a5 = const Registers::OFFSET_A5,
+        a6 = const Registers::OFFSET_A6,
+        a7 = const Registers::OFFSET_A7,
+        t0 = const Registers::OFFSET_T0,
+        t1 = const Registers::OFFSET_T1,
+        t2 = const Registers::OFFSET_T2,
+        t3 = const Registers::OFFSET_T3,
+        t4 = const Registers::OFFSET_T4,
+        t5 = const Registers::OFFSET_T5,
+        t6 = const Registers::OFFSET_T6,
+        t7 = const Registers::OFFSET_T7,
+        t8 = const Registers::OFFSET_T8,
+        u0 = const Registers::OFFSET_U0,
+        fp = const Registers::OFFSET_FP,
+        s0 = const Registers::OFFSET_S0,
+        s1 = const Registers::OFFSET_S1,
+        s2 = const Registers::OFFSET_S2,
+        s3 = const Registers::OFFSET_S3,
+        s4 = const Registers::OFFSET_S4,
+        s5 = const Registers::OFFSET_S5,
+        s6 = const Registers::OFFSET_S6,
+        s7 = const Registers::OFFSET_S7,
+        s8 = const Registers::OFFSET_S8,
+        estat = const TrapContext::OFFSET_ESTAT,
+        prmd = const TrapContext::OFFSET_PRMD,
+        era = const TrapContext::OFFSET_ERA,
+        captured_trap_handler = sym captured_trap_handler,
+        default_trap_handler = sym default_trap_handler,
+    );
+}
+
+#[unsafe(naked)]
+unsafe extern "C" fn _raw_trap_return(ctx: &mut TrapContext) -> ! {
+    naked_asm!(
+        "ld.d  $ra,  $s8, {ra}",
+        "ld.d  $tp,  $s8, {tp}",
+        "ld.d  $sp,  $s8, {sp}",
+        "ld.d  $a0,  $s8, {a0}",
+        "ld.d  $a1,  $s8, {a1}",
+        "ld.d  $a2,  $s8, {a2}",
+        "ld.d  $a3,  $s8, {a3}",
+        "ld.d  $a4,  $s8, {a4}",
+        "ld.d  $a5,  $s8, {a5}",
+        "ld.d  $a6,  $s8, {a6}",
+        "ld.d  $a7,  $s8, {a7}",
+        "ld.d  $t0,  $s8, {t0}",
+        "ld.d  $t1,  $s8, {t1}",
+        "ld.d  $t2,  $s8, {t2}",
+        "ld.d  $t3,  $s8, {t3}",
+        "ld.d  $t4,  $s8, {t4}",
+        "ld.d  $t5,  $s8, {t5}",
+        "ld.d  $t6,  $s8, {t6}",
+        "ld.d  $t7,  $s8, {t7}",
+        "ld.d  $t8,  $s8, {t8}",
+        "ld.d  $r21, $s8, {u0}",
+        "ld.d  $fp,  $s8, {fp}",
+        "ld.d  $s6,  $s8, {prmd}",
+        "ld.d  $s7,  $s8, {era}",
+        "ld.d  $s0,  $s8, {s0}",
+        "ld.d  $s1,  $s8, {s1}",
+        "ld.d  $s2,  $s8, {s2}",
+        "ld.d  $s3,  $s8, {s3}",
+        "ld.d  $s4,  $s8, {s4}",
+        "ld.d  $s5,  $s8, {s5}",
+        "csrwr $s6,  {CSR_PRMD}",
+        "csrwr $s7,  {CSR_ERA}",
+        "ld.d  $s6,  $s8, {s6}",
+        "ld.d  $s7,  $s8, {s7}",
+        "ld.d  $s8,  $s8, {s8}",
+        "ertn",
+        CSR_PRMD = const 0x1,
+        CSR_ERA = const 0x6,
+        ra = const Registers::OFFSET_RA,
+        tp = const Registers::OFFSET_TP,
+        sp = const Registers::OFFSET_SP,
+        a0 = const Registers::OFFSET_A0,
+        a1 = const Registers::OFFSET_A1,
+        a2 = const Registers::OFFSET_A2,
+        a3 = const Registers::OFFSET_A3,
+        a4 = const Registers::OFFSET_A4,
+        a5 = const Registers::OFFSET_A5,
+        a6 = const Registers::OFFSET_A6,
+        a7 = const Registers::OFFSET_A7,
+        t0 = const Registers::OFFSET_T0,
+        t1 = const Registers::OFFSET_T1,
+        t2 = const Registers::OFFSET_T2,
+        t3 = const Registers::OFFSET_T3,
+        t4 = const Registers::OFFSET_T4,
+        t5 = const Registers::OFFSET_T5,
+        t6 = const Registers::OFFSET_T6,
+        t7 = const Registers::OFFSET_T7,
+        t8 = const Registers::OFFSET_T8,
+        u0 = const Registers::OFFSET_U0,
+        fp = const Registers::OFFSET_FP,
+        s0 = const Registers::OFFSET_S0,
+        s1 = const Registers::OFFSET_S1,
+        s2 = const Registers::OFFSET_S2,
+        s3 = const Registers::OFFSET_S3,
+        s4 = const Registers::OFFSET_S4,
+        s5 = const Registers::OFFSET_S5,
+        s6 = const Registers::OFFSET_S6,
+        s7 = const Registers::OFFSET_S7,
+        s8 = const Registers::OFFSET_S8,
+        prmd = const TrapContext::OFFSET_PRMD,
+        era = const TrapContext::OFFSET_ERA,
+    );
+}
+
+#[unsafe(naked)]
+unsafe extern "C" fn default_trap_handler() {
+    unsafe extern "C" {
+        fn _default_trap_handler(trap_context: &mut TrapContext);
+    }
+
+    #[cfg(debug_assertions)]
+    naked_asm!(
+        ".cfi_startproc",
+        ".cfi_signal_frame",
+        "move $s8, $t0",
+        "move $a0, $t0",
+        "",
+        ".cfi_register $ra, $s7",
+        "move $s7, $ra",
+        "",
+        "bl   {default_handler}",
+        "",
+        "b    {trap_return}",
+        "",
+        ".cfi_endproc",
+        default_handler = sym _default_trap_handler,
+        trap_return = sym _raw_trap_return,
+    );
+
+    #[cfg(not(debug_assertions))]
+    naked_asm!(
+        "move $s8, $t0",
+        "move $a0, $t0",
+        "",
+        "bl   {default_handler}",
+        "b    {trap_return}",
+        default_handler = sym _default_trap_handler,
+        trap_return = sym _raw_trap_return,
+    );
+}
+
+static DIRTY_TASK_CONTEXT: TaskContext = unsafe { core::mem::zeroed() };
+
+#[unsafe(naked)]
+unsafe extern "C" fn captured_trap_handler() {
+    naked_asm!(
+        "la.global $a0, {dirty_task_context}",
+        "b         {switch}",
+        dirty_task_context = sym DIRTY_TASK_CONTEXT,
+        switch = sym TaskContext::switch,
+    );
+}
+
+#[unsafe(naked)]
+unsafe extern "C" fn captured_trap_return(trap_context: usize) -> ! {
+    naked_asm!(
+        "move $s8, $sp",
+        "b    {raw_trap_return}",
+        raw_trap_return = sym _raw_trap_return,
+    );
+}
+
+impl TrapReturn for TrapContext {
+    type TaskContext = TaskContext;
+
+    unsafe fn trap_return(&mut self) {
+        let irq_states = disable_irqs_save();
+
+        let mut capturer_ctx = TaskContext::new();
+        let mut to_ctx = TaskContext::new();
+        to_ctx.set_program_counter(captured_trap_return as usize);
+        to_ctx.set_stack_pointer(&raw mut *self as usize);
+        to_ctx.set_interrupt_enabled(false);
+
+        unsafe {
+            asm!(
+                "csrwr {captured_trap_context}, {CSR_CAPTURED_TRAP_CONTEXT_ADDR}",
+                "csrwr {capturer_task_context}, {CSR_CAPTURER_TASK_CONTEXT_ADDR}",
+                captured_trap_context = inout(reg) &raw mut *self => _,
+                capturer_task_context = inout(reg) &raw mut capturer_ctx => _,
+                CSR_CAPTURED_TRAP_CONTEXT_ADDR = const CSR_CAPTURED_TRAP_CONTEXT_ADDR,
+                CSR_CAPTURER_TASK_CONTEXT_ADDR = const CSR_CAPTURER_TASK_CONTEXT_ADDR,
+                options(nomem, nostack, preserves_flags),
+            );
+
+            TaskContext::switch(&mut capturer_ctx, &mut to_ctx);
+
+            asm!(
+                "csrwr $zero, {CSR_CAPTURED_TRAP_CONTEXT_ADDR}",
+                "csrwr $zero, {CSR_CAPTURER_TASK_CONTEXT_ADDR}",
+                CSR_CAPTURED_TRAP_CONTEXT_ADDR = const CSR_CAPTURED_TRAP_CONTEXT_ADDR,
+                CSR_CAPTURER_TASK_CONTEXT_ADDR = const CSR_CAPTURER_TASK_CONTEXT_ADDR,
+                options(nomem, nostack, preserves_flags),
+            )
+        }
+
+        irq_states.restore();
+    }
+}
+
+fn setup_trap_handler(trap_entry_addr: usize) {
+    ecfg::set_vs(0);
+    eentry::set_eentry(trap_entry_addr);
+}
+
+pub fn setup_trap() {
+    setup_trap_handler(_raw_trap_entry as usize);
+}
+
+#[derive(Debug, Clone, Copy)]
+pub struct IrqState(bool);
+
+impl IrqState {
+    #[inline]
+    pub fn save() -> Self {
+        IrqState(crmd::read().ie())
+    }
+}
+
+impl IrqStateTrait for IrqState {
+    fn restore(self) {
+        let Self(state) = self;
+        crmd::set_ie(state)
+    }
+}
+
+#[inline]
+pub fn disable_irqs() {
+    crmd::set_ie(false);
+}
+
+#[inline]
+pub fn enable_irqs() {
+    crmd::set_ie(true);
+}
+
+#[inline]
+pub fn disable_irqs_save() -> IrqState {
+    let state = IrqState::save();
+    disable_irqs();
+
+    state
+}

+ 297 - 0
crates/eonix_hal/src/arch/loongarch64/trap/trap_context.rs

@@ -0,0 +1,297 @@
+use crate::processor::CPU;
+use core::{arch::asm, mem::offset_of};
+use eonix_hal_traits::{
+    fault::{Fault, PageFaultErrorCode},
+    trap::{RawTrapContext, TrapType},
+};
+use eonix_mm::address::VAddr;
+use loongArch64::register::{
+    badv,
+    estat::{Estat, Exception, Interrupt, Trap},
+    ticlr,
+};
+
+#[repr(C)]
+#[derive(Default, Clone, Copy)]
+pub struct Registers {
+    ra: u64,
+    tp: u64,
+    sp: u64,
+    a0: u64,
+    a1: u64,
+    a2: u64,
+    a3: u64,
+    a4: u64,
+    a5: u64,
+    a6: u64,
+    a7: u64,
+    t0: u64,
+    t1: u64,
+    t2: u64,
+    t3: u64,
+    t4: u64,
+    t5: u64,
+    t6: u64,
+    t7: u64,
+    t8: u64,
+    u0: u64,
+    fp: u64,
+    s0: u64,
+    s1: u64,
+    s2: u64,
+    s3: u64,
+    s4: u64,
+    s5: u64,
+    s6: u64,
+    s7: u64,
+    s8: u64,
+}
+
+/// Saved CPU context when a trap (interrupt or exception) occurs on RISC-V 64.
+#[repr(C)]
+#[derive(Clone, Copy)]
+pub struct TrapContext {
+    regs: Registers,
+    estat: Estat,
+    prmd: usize,
+    era: usize,
+}
+
+impl Registers {
+    pub const OFFSET_RA: usize = offset_of!(Registers, ra);
+    pub const OFFSET_TP: usize = offset_of!(Registers, tp);
+    pub const OFFSET_SP: usize = offset_of!(Registers, sp);
+    pub const OFFSET_A0: usize = offset_of!(Registers, a0);
+    pub const OFFSET_A1: usize = offset_of!(Registers, a1);
+    pub const OFFSET_A2: usize = offset_of!(Registers, a2);
+    pub const OFFSET_A3: usize = offset_of!(Registers, a3);
+    pub const OFFSET_A4: usize = offset_of!(Registers, a4);
+    pub const OFFSET_A5: usize = offset_of!(Registers, a5);
+    pub const OFFSET_A6: usize = offset_of!(Registers, a6);
+    pub const OFFSET_A7: usize = offset_of!(Registers, a7);
+    pub const OFFSET_T0: usize = offset_of!(Registers, t0);
+    pub const OFFSET_T1: usize = offset_of!(Registers, t1);
+    pub const OFFSET_T2: usize = offset_of!(Registers, t2);
+    pub const OFFSET_T3: usize = offset_of!(Registers, t3);
+    pub const OFFSET_T4: usize = offset_of!(Registers, t4);
+    pub const OFFSET_T5: usize = offset_of!(Registers, t5);
+    pub const OFFSET_T6: usize = offset_of!(Registers, t6);
+    pub const OFFSET_T7: usize = offset_of!(Registers, t7);
+    pub const OFFSET_T8: usize = offset_of!(Registers, t8);
+    pub const OFFSET_U0: usize = offset_of!(Registers, u0);
+    pub const OFFSET_FP: usize = offset_of!(Registers, fp);
+    pub const OFFSET_S0: usize = offset_of!(Registers, s0);
+    pub const OFFSET_S1: usize = offset_of!(Registers, s1);
+    pub const OFFSET_S2: usize = offset_of!(Registers, s2);
+    pub const OFFSET_S3: usize = offset_of!(Registers, s3);
+    pub const OFFSET_S4: usize = offset_of!(Registers, s4);
+    pub const OFFSET_S5: usize = offset_of!(Registers, s5);
+    pub const OFFSET_S6: usize = offset_of!(Registers, s6);
+    pub const OFFSET_S7: usize = offset_of!(Registers, s7);
+    pub const OFFSET_S8: usize = offset_of!(Registers, s8);
+}
+
+type FIrq = fn(handler: fn(irqno: usize));
+type FTimer = fn(handler: fn());
+
+impl TrapContext {
+    pub const OFFSET_ESTAT: usize = offset_of!(TrapContext, estat);
+    pub const OFFSET_PRMD: usize = offset_of!(TrapContext, prmd);
+    pub const OFFSET_ERA: usize = offset_of!(TrapContext, era);
+
+    fn syscall_no(&self) -> usize {
+        self.regs.a7 as usize
+    }
+
+    fn syscall_args(&self) -> [usize; 6] {
+        [
+            self.regs.a0 as usize,
+            self.regs.a1 as usize,
+            self.regs.a2 as usize,
+            self.regs.a3 as usize,
+            self.regs.a4 as usize,
+            self.regs.a5 as usize,
+        ]
+    }
+
+    fn gen_page_fault(&self, mut err_code: PageFaultErrorCode) -> TrapType<FIrq, FTimer> {
+        #[inline(always)]
+        fn get_page_fault_address() -> VAddr {
+            VAddr::from(badv::read().vaddr())
+        }
+
+        err_code.set(PageFaultErrorCode::UserAccess, self.is_user_mode());
+
+        TrapType::Fault(Fault::PageFault {
+            error_code: err_code,
+            address: get_page_fault_address(),
+        })
+    }
+}
+
+impl RawTrapContext for TrapContext {
+    type FIrq = FIrq;
+    type FTimer = FTimer;
+
+    fn new() -> Self {
+        Self {
+            regs: Registers::default(),
+            estat: Estat::from(0),
+            prmd: 0,
+            era: 0,
+        }
+    }
+
+    fn trap_type(&self) -> TrapType<Self::FIrq, Self::FTimer> {
+        match self.estat.cause() {
+            Trap::Interrupt(Interrupt::Timer) => TrapType::Timer {
+                callback: |handler| {
+                    ticlr::clear_timer_interrupt();
+                    handler();
+                },
+            },
+            Trap::Interrupt(interrupt) => match interrupt as usize {
+                2..=7 => TrapType::Irq {
+                    callback: |handler| {
+                        todo!("handle IRQs");
+                        // let mut cpu = CPU::local();
+                        // match cpu.as_mut().interrupt.plic.claim_interrupt() {
+                        //     None => {}
+                        //     Some(irqno) => {
+                        //         cpu.interrupt.plic.complete_interrupt(irqno);
+                        //         handler(irqno);
+                        //     }
+                        // }
+                    },
+                },
+                interrupt => TrapType::Fault(Fault::Unknown(interrupt)),
+            },
+            Trap::Exception(
+                Exception::InstructionPrivilegeIllegal
+                | Exception::FetchInstructionAddressError
+                | Exception::AddressNotAligned
+                | Exception::MemoryAccessAddressError
+                | Exception::PagePrivilegeIllegal,
+            ) => TrapType::Fault(Fault::BadAccess),
+            Trap::Exception(Exception::InstructionNotExist) => TrapType::Fault(Fault::InvalidOp),
+            Trap::Exception(Exception::Syscall) => TrapType::Syscall {
+                no: self.syscall_no(),
+                args: self.syscall_args(),
+            },
+            Trap::Exception(Exception::LoadPageFault | Exception::PageNonReadableFault) => {
+                self.gen_page_fault(PageFaultErrorCode::Read)
+            }
+            Trap::Exception(Exception::StorePageFault | Exception::PageModifyFault) => {
+                self.gen_page_fault(PageFaultErrorCode::Write)
+            }
+            Trap::Exception(Exception::FetchPageFault | Exception::PageNonExecutableFault) => {
+                self.gen_page_fault(PageFaultErrorCode::InstructionFetch)
+            }
+            Trap::Exception(exception) => TrapType::Fault(Fault::Unknown(exception as usize)),
+            Trap::MachineError(_) | Trap::Unknown => todo!(),
+        }
+    }
+
+    fn get_program_counter(&self) -> usize {
+        self.era
+    }
+
+    fn get_stack_pointer(&self) -> usize {
+        self.regs.sp as usize
+    }
+
+    fn set_program_counter(&mut self, pc: usize) {
+        self.era = pc;
+    }
+
+    fn set_stack_pointer(&mut self, sp: usize) {
+        self.regs.sp = sp as u64;
+    }
+
+    fn is_interrupt_enabled(&self) -> bool {
+        self.prmd & (1 << 2) != 0
+    }
+
+    fn set_interrupt_enabled(&mut self, enabled: bool) {
+        match enabled {
+            true => self.prmd |= 1 << 2,
+            false => self.prmd &= !(1 << 2),
+        }
+    }
+
+    fn is_user_mode(&self) -> bool {
+        self.prmd & 0x3 != 0
+    }
+
+    fn set_user_mode(&mut self, user: bool) {
+        match user {
+            true => self.prmd |= 0x3,
+            false => self.prmd &= !0x3,
+        }
+    }
+
+    fn set_user_return_value(&mut self, retval: usize) {
+        self.regs.a0 = retval as u64;
+    }
+
+    fn set_user_call_frame<E>(
+        &mut self,
+        pc: usize,
+        sp: Option<usize>,
+        ra: Option<usize>,
+        args: &[usize],
+        _write_memory: impl Fn(VAddr, &[u8]) -> Result<(), E>,
+    ) -> Result<(), E> {
+        self.set_program_counter(pc);
+
+        if let Some(sp) = sp {
+            self.set_stack_pointer(sp);
+        }
+
+        if let Some(ra) = ra {
+            self.regs.ra = ra as u64;
+        }
+
+        let arg_regs = [
+            &mut self.regs.a0,
+            &mut self.regs.a1,
+            &mut self.regs.a2,
+            &mut self.regs.a3,
+            &mut self.regs.a4,
+            &mut self.regs.a5,
+        ];
+
+        for (&arg, reg) in args.iter().zip(arg_regs.into_iter()) {
+            *reg = arg as u64;
+        }
+
+        Ok(())
+    }
+}
+
+impl TrapContext {
+    fn get_page_fault_error_code(&self, exception: Exception) -> PageFaultErrorCode {
+        let mut error_code = PageFaultErrorCode::empty();
+
+        match exception {
+            Exception::FetchPageFault => {
+                error_code |= PageFaultErrorCode::InstructionFetch;
+            }
+            Exception::LoadPageFault => {
+                error_code |= PageFaultErrorCode::Read;
+            }
+            Exception::StorePageFault => {
+                error_code |= PageFaultErrorCode::Write;
+            }
+            _ => {
+                unreachable!();
+            }
+        }
+
+        if self.is_user_mode() {
+            error_code |= PageFaultErrorCode::UserAccess;
+        }
+
+        error_code
+    }
+}

+ 3 - 0
crates/eonix_hal/src/arch/mod.rs

@@ -9,6 +9,9 @@ cfg_if::cfg_if! {
     } else if #[cfg(target_arch = "riscv64")] {
         pub mod riscv64;
         pub use riscv64::*;
+    } else if #[cfg(target_arch = "loongarch64")] {
+        pub mod loongarch64;
+        pub use loongarch64::*;
     } else {
         compile_error!("Unsupported architecture");
     }

+ 7 - 2
crates/eonix_hal/src/arch/riscv64/bootstrap.rs

@@ -1,7 +1,7 @@
 use super::{
     config::{self, mm::*},
     console::write_str,
-    cpu::CPUID,
+    cpu::{CPUID, CPU_COUNT},
     time::set_next_timer,
     trap::TRAP_SCRATCH,
 };
@@ -127,7 +127,6 @@ pub unsafe extern "C" fn riscv64_start(hart_id: usize, dtb_addr: PAddr) -> ! {
 
     setup_cpu(&alloc, hart_id);
 
-    // TODO: set up interrupt, smp
     ScopedAllocator::new(&mut [0; 1024])
         .with_alloc(|mem_alloc| bootstrap_smp(mem_alloc, &real_allocator));
 
@@ -203,6 +202,8 @@ fn setup_kernel_page_table(alloc: impl PageAlloc) {
 
 /// set up tp register to percpu
 fn setup_cpu(alloc: impl PageAlloc, hart_id: usize) {
+    CPU_COUNT.fetch_add(1, Ordering::Relaxed);
+
     let mut percpu_area = PercpuArea::new(|layout| {
         let page_count = layout.size().div_ceil(PAGE_SIZE);
         let page = Page::alloc_at_least_in(page_count, alloc);
@@ -394,3 +395,7 @@ pub fn early_console_write(s: &str) {
 pub fn early_console_putchar(ch: u8) {
     console_putchar(ch);
 }
+
+pub fn shutdown() -> ! {
+    sbi::legacy::shutdown();
+}

+ 3 - 1
crates/eonix_hal/src/arch/riscv64/cpu.rs

@@ -3,7 +3,7 @@ use super::{
     trap::{setup_trap, TRAP_SCRATCH},
 };
 use crate::arch::fdt::{FdtExt, FDT};
-use core::{arch::asm, pin::Pin, ptr::NonNull};
+use core::{arch::asm, pin::Pin, ptr::NonNull, sync::atomic::AtomicUsize};
 use eonix_preempt::PreemptGuard;
 use eonix_sync_base::LazyLock;
 use riscv::register::{
@@ -12,6 +12,8 @@ use riscv::register::{
 };
 use sbi::PhysicalAddress;
 
+pub static CPU_COUNT: AtomicUsize = AtomicUsize::new(0);
+
 #[eonix_percpu::define_percpu]
 pub static CPUID: usize = 0;
 

+ 1 - 1
crates/eonix_hal/src/lib.rs

@@ -19,7 +19,7 @@ pub mod fpu {
 }
 
 pub mod processor {
-    pub use crate::arch::cpu::{halt, UserTLS, CPU};
+    pub use crate::arch::cpu::{halt, UserTLS, CPU, CPU_COUNT};
 }
 
 /// Re-export the arch module for use in other crates

+ 1 - 1
crates/eonix_hal/src/link.x.in

@@ -3,7 +3,7 @@ PROVIDE(_stext = ORIGIN(REGION_TEXT));
 SECTIONS {
     .text _stext :
     {
-        __kernel_start = .;
+        PROVIDE(__kernel_start = .);
         __stext = .;
 
         *(.text.entry);

+ 1 - 1
crates/eonix_mm/src/page_table/pte.rs

@@ -10,7 +10,7 @@ bitflags! {
         const GLOBAL = 8;
     }
 
-    #[derive(Clone, Copy, PartialEq)]
+    #[derive(Debug, Clone, Copy, PartialEq)]
     pub struct PageAttribute: usize {
         const PRESENT = 1;
         const READ = 2;

+ 23 - 0
crates/eonix_percpu/eonix_percpu_macros/src/lib.rs

@@ -1,5 +1,6 @@
 extern crate proc_macro;
 
+mod loongarch64;
 mod riscv64;
 mod x86_64;
 
@@ -231,3 +232,25 @@ pub fn define_percpu_shared_riscv64(
     )
     .into()
 }
+
+#[proc_macro_attribute]
+pub fn define_percpu_loongarch64(
+    attrs: proc_macro::TokenStream,
+    item: proc_macro::TokenStream,
+) -> proc_macro::TokenStream {
+    define_percpu_impl(attrs.into(), item.into(), loongarch64::get_percpu_pointer).into()
+}
+
+#[proc_macro_attribute]
+pub fn define_percpu_shared_loongarch64(
+    attrs: proc_macro::TokenStream,
+    item: proc_macro::TokenStream,
+) -> proc_macro::TokenStream {
+    define_percpu_shared_impl(
+        attrs.into(),
+        item.into(),
+        loongarch64::get_percpu_pointer,
+        loongarch64::get_percpu_offset,
+    )
+    .into()
+}

+ 51 - 0
crates/eonix_percpu/eonix_percpu_macros/src/loongarch64.rs

@@ -0,0 +1,51 @@
+use proc_macro2::TokenStream;
+use quote::quote;
+use syn::{Ident, Type};
+
+/// Get the base address for percpu variables of the current thread.
+pub fn get_percpu_pointer(percpu: &Ident, ty: &Type) -> TokenStream {
+    quote! {
+        {
+            let base: *mut #ty;
+
+            unsafe extern "C" {
+                fn PERCPU_DATA_START();
+            }
+
+            ::core::arch::asm!(
+                "sub.d     {base}, {base}, {start}",
+                "add.d     {base}, {base}, $tp",
+                base = inout(reg) &raw const #percpu => base,
+                start = in(reg) PERCPU_DATA_START as usize,
+                options(nostack, preserves_flags)
+            );
+
+            base
+        }
+    }
+}
+
+pub fn get_percpu_offset(percpu: &Ident) -> TokenStream {
+    quote! {
+        unsafe {
+            let offset: usize;
+
+            unsafe extern "C" {
+                fn PERCPU_DATA_START();
+            }
+
+            ::core::arch::asm!(
+                "la.global {tmp},    {start}",
+                "la.global {output}, {var}",
+                "sub.d     {output}, {output}, {tmp}",
+                start = sym PERCPU_DATA_START,
+                var = sym #percpu,
+                tmp = out(reg) _,
+                output = out(reg) offset,
+                options(nostack, preserves_flags)
+            );
+
+            offset
+        }
+    }
+}

+ 6 - 0
crates/eonix_percpu/src/lib.rs

@@ -18,6 +18,12 @@ pub use eonix_percpu_macros::define_percpu_riscv64 as define_percpu;
 #[cfg(target_arch = "riscv64")]
 pub use eonix_percpu_macros::define_percpu_shared_riscv64 as define_percpu_shared;
 
+#[cfg(target_arch = "loongarch64")]
+pub use eonix_percpu_macros::define_percpu_loongarch64 as define_percpu;
+
+#[cfg(target_arch = "loongarch64")]
+pub use eonix_percpu_macros::define_percpu_shared_loongarch64 as define_percpu_shared;
+
 const MAX_CPUS: usize = 256;
 
 #[repr(align(16))]

+ 1 - 0
crates/posix_types/src/lib.rs

@@ -4,6 +4,7 @@ pub mod constants;
 pub mod ctypes;
 pub mod namei;
 pub mod open;
+pub mod poll;
 pub mod result;
 pub mod signal;
 pub mod stat;

+ 5 - 0
crates/posix_types/src/poll.rs

@@ -0,0 +1,5 @@
+pub const FDSET_LENGTH: usize = 1024 / (8 * size_of::<usize>());
+
+pub struct FDSet {
+    fds_bits: [usize; FDSET_LENGTH],
+}

+ 8 - 0
crates/posix_types/src/stat.rs

@@ -1,3 +1,5 @@
+use core::time::Duration;
+
 #[repr(C)]
 #[derive(Debug, Default, Copy, Clone)]
 pub struct StatXTimestamp {
@@ -100,3 +102,9 @@ impl From<StatX> for Stat {
         }
     }
 }
+
+impl From<TimeSpec> for Duration {
+    fn from(value: TimeSpec) -> Self {
+        Self::new(value.tv_sec, value.tv_nsec)
+    }
+}

+ 3 - 0
crates/posix_types/src/syscall_no.rs

@@ -7,6 +7,9 @@ cfg_if! {
     } else if #[cfg(target_arch = "x86_64")] {
         mod x86_64;
         pub use x86_64::*;
+    } else if #[cfg(target_arch = "loongarch64")] {
+        mod loongarch64;
+        pub use loongarch64::*;
     } else {
         compile_error!("Unsupported architecture for syscall numbers");
     }

+ 319 - 0
crates/posix_types/src/syscall_no/loongarch64.rs

@@ -0,0 +1,319 @@
+pub const SYS_IO_SETUP: usize = 0;
+pub const SYS_IO_DESTROY: usize = 1;
+pub const SYS_IO_SUBMIT: usize = 2;
+pub const SYS_IO_CANCEL: usize = 3;
+pub const SYS_IO_GETEVENTS: usize = 4;
+pub const SYS_SETXATTR: usize = 5;
+pub const SYS_LSETXATTR: usize = 6;
+pub const SYS_FSETXATTR: usize = 7;
+pub const SYS_GETXATTR: usize = 8;
+pub const SYS_LGETXATTR: usize = 9;
+pub const SYS_FGETXATTR: usize = 10;
+pub const SYS_LISTXATTR: usize = 11;
+pub const SYS_LLISTXATTR: usize = 12;
+pub const SYS_FLISTXATTR: usize = 13;
+pub const SYS_REMOVEXATTR: usize = 14;
+pub const SYS_LREMOVEXATTR: usize = 15;
+pub const SYS_FREMOVEXATTR: usize = 16;
+pub const SYS_GETCWD: usize = 17;
+pub const SYS_LOOKUP_DCOOKIE: usize = 18;
+pub const SYS_EVENTFD2: usize = 19;
+pub const SYS_EPOLL_CREATE1: usize = 20;
+pub const SYS_EPOLL_CTL: usize = 21;
+pub const SYS_EPOLL_PWAIT: usize = 22;
+pub const SYS_DUP: usize = 23;
+pub const SYS_DUP3: usize = 24;
+pub const SYS_FCNTL64: usize = 25;
+pub const SYS_INOTIFY_INIT1: usize = 26;
+pub const SYS_INOTIFY_ADD_WATCH: usize = 27;
+pub const SYS_INOTIFY_RM_WATCH: usize = 28;
+pub const SYS_IOCTL: usize = 29;
+pub const SYS_IOPRIO_SET: usize = 30;
+pub const SYS_IOPRIO_GET: usize = 31;
+pub const SYS_FLOCK: usize = 32;
+pub const SYS_MKNODAT: usize = 33;
+pub const SYS_MKDIRAT: usize = 34;
+pub const SYS_UNLINKAT: usize = 35;
+pub const SYS_SYMLINKAT: usize = 36;
+pub const SYS_LINKAT: usize = 37;
+pub const SYS_RENAMEAT: usize = 38;
+pub const SYS_UMOUNT: usize = 39;
+pub const SYS_MOUNT: usize = 40;
+pub const SYS_PIVOT_ROOT: usize = 41;
+pub const SYS_NI_SYSCALL: usize = 42;
+pub const SYS_STATFS64: usize = 43;
+pub const SYS_FSTATFS64: usize = 44;
+pub const SYS_TRUNCATE64: usize = 45;
+pub const SYS_FTRUNCATE64: usize = 46;
+pub const SYS_FALLOCATE: usize = 47;
+pub const SYS_FACCESSAT: usize = 48;
+pub const SYS_CHDIR: usize = 49;
+pub const SYS_FCHDIR: usize = 50;
+pub const SYS_CHROOT: usize = 51;
+pub const SYS_FCHMOD: usize = 52;
+pub const SYS_FCHMODAT: usize = 53;
+pub const SYS_FCHOWNAT: usize = 54;
+pub const SYS_FCHOWN: usize = 55;
+pub const SYS_OPENAT: usize = 56;
+pub const SYS_CLOSE: usize = 57;
+pub const SYS_VHANGUP: usize = 58;
+pub const SYS_PIPE2: usize = 59;
+pub const SYS_QUOTACTL: usize = 60;
+pub const SYS_GETDENTS64: usize = 61;
+pub const SYS_LSEEK: usize = 62;
+pub const SYS_READ: usize = 63;
+pub const SYS_WRITE: usize = 64;
+pub const SYS_READV: usize = 65;
+pub const SYS_WRITEV: usize = 66;
+pub const SYS_PREAD64: usize = 67;
+pub const SYS_PWRITE64: usize = 68;
+pub const SYS_PREADV: usize = 69;
+pub const SYS_PWRITEV: usize = 70;
+pub const SYS_SENDFILE64: usize = 71;
+pub const SYS_PSELECT6: usize = 72;
+pub const SYS_PPOLL: usize = 73;
+pub const SYS_SIGNALFD4: usize = 74;
+pub const SYS_VMSPLICE: usize = 75;
+pub const SYS_SPLICE: usize = 76;
+pub const SYS_TEE: usize = 77;
+pub const SYS_READLINKAT: usize = 78;
+pub const SYS_NEWFSTATAT: usize = 79;
+pub const SYS_NEWFSTAT: usize = 80;
+pub const SYS_SYNC: usize = 81;
+pub const SYS_FSYNC: usize = 82;
+pub const SYS_FDATASYNC: usize = 83;
+pub const SYS_SYNC_FILE_RANGE2: usize = 84;
+pub const SYS_SYNC_FILE_RANGE: usize = 84;
+pub const SYS_TIMERFD_CREATE: usize = 85;
+pub const SYS_TIMERFD_SETTIME32: usize = 86;
+pub const SYS_TIMERFD_GETTIME32: usize = 87;
+pub const SYS_UTIMENSAT: usize = 88;
+pub const SYS_ACCT: usize = 89;
+pub const SYS_CAPGET: usize = 90;
+pub const SYS_CAPSET: usize = 91;
+pub const SYS_PERSONALITY: usize = 92;
+pub const SYS_EXIT: usize = 93;
+pub const SYS_EXIT_GROUP: usize = 94;
+pub const SYS_WAITID: usize = 95;
+pub const SYS_SET_TID_ADDRESS: usize = 96;
+pub const SYS_UNSHARE: usize = 97;
+pub const SYS_FUTEX_TIME32: usize = 98;
+pub const SYS_SET_ROBUST_LIST: usize = 99;
+pub const SYS_GET_ROBUST_LIST: usize = 100;
+pub const SYS_NANOSLEEP: usize = 101;
+pub const SYS_GETITIMER: usize = 102;
+pub const SYS_SETITIMER: usize = 103;
+pub const SYS_KEXEC_LOAD: usize = 104;
+pub const SYS_INIT_MODULE: usize = 105;
+pub const SYS_DELETE_MODULE: usize = 106;
+pub const SYS_TIMER_CREATE: usize = 107;
+pub const SYS_TIMER_GETTIME32: usize = 108;
+pub const SYS_TIMER_GETOVERRUN: usize = 109;
+pub const SYS_TIMER_SETTIME32: usize = 110;
+pub const SYS_TIMER_DELETE: usize = 111;
+pub const SYS_CLOCK_SETTIME32: usize = 112;
+pub const SYS_CLOCK_GETTIME: usize = 113;
+pub const SYS_CLOCK_GETRES_TIME32: usize = 114;
+pub const SYS_CLOCK_NANOSLEEP_TIME32: usize = 115;
+pub const SYS_SYSLOG: usize = 116;
+pub const SYS_PTRACE: usize = 117;
+pub const SYS_SCHED_SETPARAM: usize = 118;
+pub const SYS_SCHED_SETSCHEDULER: usize = 119;
+pub const SYS_SCHED_GETSCHEDULER: usize = 120;
+pub const SYS_SCHED_GETPARAM: usize = 121;
+pub const SYS_SCHED_SETAFFINITY: usize = 122;
+pub const SYS_SCHED_GETAFFINITY: usize = 123;
+pub const SYS_SCHED_YIELD: usize = 124;
+pub const SYS_SCHED_GET_PRIORITY_MAX: usize = 125;
+pub const SYS_SCHED_GET_PRIORITY_MIN: usize = 126;
+pub const SYS_SCHED_RR_GET_INTERVAL_TIME32: usize = 127;
+pub const SYS_RESTART_SYSCALL: usize = 128;
+pub const SYS_KILL: usize = 129;
+pub const SYS_TKILL: usize = 130;
+pub const SYS_TGKILL: usize = 131;
+pub const SYS_SIGALTSTACK: usize = 132;
+pub const SYS_RT_SIGSUSPEND: usize = 133;
+pub const SYS_RT_SIGACTION: usize = 134;
+pub const SYS_RT_SIGPROCMASK: usize = 135;
+pub const SYS_RT_SIGPENDING: usize = 136;
+pub const SYS_RT_SIGTIMEDWAIT_TIME32: usize = 137;
+pub const SYS_RT_SIGQUEUEINFO: usize = 138;
+pub const SYS_RT_SIGRETURN: usize = 139;
+pub const SYS_SETPRIORITY: usize = 140;
+pub const SYS_GETPRIORITY: usize = 141;
+pub const SYS_REBOOT: usize = 142;
+pub const SYS_SETREGID: usize = 143;
+pub const SYS_SETGID: usize = 144;
+pub const SYS_SETREUID: usize = 145;
+pub const SYS_SETUID: usize = 146;
+pub const SYS_SETRESUID: usize = 147;
+pub const SYS_GETRESUID: usize = 148;
+pub const SYS_SETRESGID: usize = 149;
+pub const SYS_GETRESGID: usize = 150;
+pub const SYS_SETFSUID: usize = 151;
+pub const SYS_SETFSGID: usize = 152;
+pub const SYS_TIMES: usize = 153;
+pub const SYS_SETPGID: usize = 154;
+pub const SYS_GETPGID: usize = 155;
+pub const SYS_GETSID: usize = 156;
+pub const SYS_SETSID: usize = 157;
+pub const SYS_GETGROUPS: usize = 158;
+pub const SYS_SETGROUPS: usize = 159;
+pub const SYS_NEWUNAME: usize = 160;
+pub const SYS_SETHOSTNAME: usize = 161;
+pub const SYS_SETDOMAINNAME: usize = 162;
+pub const SYS_GETRLIMIT: usize = 163;
+pub const SYS_SETRLIMIT: usize = 164;
+pub const SYS_GETRUSAGE: usize = 165;
+pub const SYS_UMASK: usize = 166;
+pub const SYS_PRCTL: usize = 167;
+pub const SYS_GETCPU: usize = 168;
+pub const SYS_GETTIMEOFDAY: usize = 169;
+pub const SYS_SETTIMEOFDAY: usize = 170;
+pub const SYS_ADJTIMEX_TIME32: usize = 171;
+pub const SYS_GETPID: usize = 172;
+pub const SYS_GETPPID: usize = 173;
+pub const SYS_GETUID: usize = 174;
+pub const SYS_GETEUID: usize = 175;
+pub const SYS_GETGID: usize = 176;
+pub const SYS_GETEGID: usize = 177;
+pub const SYS_GETTID: usize = 178;
+pub const SYS_SYSINFO: usize = 179;
+pub const SYS_MQ_OPEN: usize = 180;
+pub const SYS_MQ_UNLINK: usize = 181;
+pub const SYS_MQ_TIMEDSEND_TIME32: usize = 182;
+pub const SYS_MQ_TIMEDRECEIVE_TIME32: usize = 183;
+pub const SYS_MQ_NOTIFY: usize = 184;
+pub const SYS_MQ_GETSETATTR: usize = 185;
+pub const SYS_MSGGET: usize = 186;
+pub const SYS_MSGCTL: usize = 187;
+pub const SYS_MSGRCV: usize = 188;
+pub const SYS_MSGSND: usize = 189;
+pub const SYS_SEMGET: usize = 190;
+pub const SYS_SEMCTL: usize = 191;
+pub const SYS_SEMTIMEDOP_TIME32: usize = 192;
+pub const SYS_SEMOP: usize = 193;
+pub const SYS_SHMGET: usize = 194;
+pub const SYS_SHMCTL: usize = 195;
+pub const SYS_SHMAT: usize = 196;
+pub const SYS_SHMDT: usize = 197;
+pub const SYS_SOCKET: usize = 198;
+pub const SYS_SOCKETPAIR: usize = 199;
+pub const SYS_BIND: usize = 200;
+pub const SYS_LISTEN: usize = 201;
+pub const SYS_ACCEPT: usize = 202;
+pub const SYS_CONNECT: usize = 203;
+pub const SYS_GETSOCKNAME: usize = 204;
+pub const SYS_GETPEERNAME: usize = 205;
+pub const SYS_SENDTO: usize = 206;
+pub const SYS_RECVFROM: usize = 207;
+pub const SYS_SETSOCKOPT: usize = 208;
+pub const SYS_GETSOCKOPT: usize = 209;
+pub const SYS_SHUTDOWN: usize = 210;
+pub const SYS_SENDMSG: usize = 211;
+pub const SYS_RECVMSG: usize = 212;
+pub const SYS_READAHEAD: usize = 213;
+pub const SYS_BRK: usize = 214;
+pub const SYS_MUNMAP: usize = 215;
+pub const SYS_MREMAP: usize = 216;
+pub const SYS_ADD_KEY: usize = 217;
+pub const SYS_REQUEST_KEY: usize = 218;
+pub const SYS_KEYCTL: usize = 219;
+pub const SYS_CLONE: usize = 220;
+pub const SYS_EXECVE: usize = 221;
+pub const SYS_MMAP: usize = 222;
+pub const SYS_FADVISE64_64: usize = 223;
+pub const SYS_SWAPON: usize = 224;
+pub const SYS_SWAPOFF: usize = 225;
+pub const SYS_MPROTECT: usize = 226;
+pub const SYS_MSYNC: usize = 227;
+pub const SYS_MLOCK: usize = 228;
+pub const SYS_MUNLOCK: usize = 229;
+pub const SYS_MLOCKALL: usize = 230;
+pub const SYS_MUNLOCKALL: usize = 231;
+pub const SYS_MINCORE: usize = 232;
+pub const SYS_MADVISE: usize = 233;
+pub const SYS_REMAP_FILE_PAGES: usize = 234;
+pub const SYS_MBIND: usize = 235;
+pub const SYS_GET_MEMPOLICY: usize = 236;
+pub const SYS_SET_MEMPOLICY: usize = 237;
+pub const SYS_MIGRATE_PAGES: usize = 238;
+pub const SYS_MOVE_PAGES: usize = 239;
+pub const SYS_RT_TGSIGQUEUEINFO: usize = 240;
+pub const SYS_PERF_EVENT_OPEN: usize = 241;
+pub const SYS_ACCEPT4: usize = 242;
+pub const SYS_RECVMMSG_TIME32: usize = 243;
+pub const SYS_ARCH_SPECIFIC_SYSCALL: usize = 244;
+pub const SYS_WAIT4: usize = 260;
+pub const SYS_PRLIMIT64: usize = 261;
+pub const SYS_FANOTIFY_INIT: usize = 262;
+pub const SYS_FANOTIFY_MARK: usize = 263;
+pub const SYS_NAME_TO_HANDLE_AT: usize = 264;
+pub const SYS_OPEN_BY_HANDLE_AT: usize = 265;
+pub const SYS_CLOCK_ADJTIME32: usize = 266;
+pub const SYS_SYNCFS: usize = 267;
+pub const SYS_SETNS: usize = 268;
+pub const SYS_SENDMMSG: usize = 269;
+pub const SYS_PROCESS_VM_READV: usize = 270;
+pub const SYS_PROCESS_VM_WRITEV: usize = 271;
+pub const SYS_KCMP: usize = 272;
+pub const SYS_FINIT_MODULE: usize = 273;
+pub const SYS_SCHED_SETATTR: usize = 274;
+pub const SYS_SCHED_GETATTR: usize = 275;
+pub const SYS_RENAMEAT2: usize = 276;
+pub const SYS_SECCOMP: usize = 277;
+pub const SYS_GETRANDOM: usize = 278;
+pub const SYS_MEMFD_CREATE: usize = 279;
+pub const SYS_BPF: usize = 280;
+pub const SYS_EXECVEAT: usize = 281;
+pub const SYS_USERFAULTFD: usize = 282;
+pub const SYS_MEMBARRIER: usize = 283;
+pub const SYS_MLOCK2: usize = 284;
+pub const SYS_COPY_FILE_RANGE: usize = 285;
+pub const SYS_PREADV2: usize = 286;
+pub const SYS_PWRITEV2: usize = 287;
+pub const SYS_PKEY_MPROTECT: usize = 288;
+pub const SYS_PKEY_ALLOC: usize = 289;
+pub const SYS_PKEY_FREE: usize = 290;
+pub const SYS_STATX: usize = 291;
+pub const SYS_IO_PGETEVENTS_TIME32: usize = 292;
+pub const SYS_RSEQ: usize = 293;
+pub const SYS_KEXEC_FILE_LOAD: usize = 294;
+// 64-bit time syscalls
+pub const SYS_CLOCK_SETTIME: usize = 404;
+pub const SYS_CLOCK_ADJTIME: usize = 405;
+pub const SYS_CLOCK_GETRES: usize = 406;
+pub const SYS_CLOCK_NANOSLEEP: usize = 407;
+pub const SYS_TIMER_GETTIME: usize = 408;
+pub const SYS_TIMER_SETTIME: usize = 409;
+pub const SYS_TIMERFD_GETTIME: usize = 410;
+pub const SYS_TIMERFD_SETTIME: usize = 411;
+pub const SYS_UTIMENSAT_TIME64: usize = 412;
+pub const SYS_PPOLL_TIME64: usize = 414;
+pub const SYS_IO_PGETEVENTS: usize = 416;
+pub const SYS_RECVMMSG: usize = 417;
+pub const SYS_MQ_TIMEDSEND: usize = 418;
+pub const SYS_MQ_TIMEDRECEIVE: usize = 419;
+pub const SYS_SEMTIMEDOP: usize = 420;
+pub const SYS_RT_SIGTIMEDWAIT: usize = 421;
+pub const SYS_FUTEX: usize = 422;
+pub const SYS_SCHED_RR_GET_INTERVAL: usize = 423;
+pub const SYS_PIDFD_SEND_SIGNAL: usize = 424;
+pub const SYS_IO_URING_SETUP: usize = 425;
+pub const SYS_IO_URING_ENTER: usize = 426;
+pub const SYS_IO_URING_REGISTER: usize = 427;
+pub const SYS_OPEN_TREE: usize = 428;
+pub const SYS_MOVE_MOUNT: usize = 429;
+pub const SYS_FSOPEN: usize = 430;
+pub const SYS_FSCONFIG: usize = 431;
+pub const SYS_FSMOUNT: usize = 432;
+pub const SYS_FSPICK: usize = 433;
+pub const SYS_PIDFD_OPEN: usize = 434;
+pub const SYS_CLONE3: usize = 435;
+pub const SYS_CLOSE_RANGE: usize = 436;
+pub const SYS_OPENAT2: usize = 437;
+pub const SYS_PIDFD_GETFD: usize = 438;
+pub const SYS_FACCESSAT2: usize = 439;
+pub const SYS_PROCESS_MADVISE: usize = 440;
+pub const SYS_EPOLL_PWAIT2: usize = 441;
+pub const SYS_MOUNT_SETATTR: usize = 442;

+ 3 - 3
crates/posix_types/src/syscall_no/riscv64.rs

@@ -71,7 +71,7 @@ pub const SYS_PWRITE64: usize = 68;
 pub const SYS_PREADV: usize = 69;
 pub const SYS_PWRITEV: usize = 70;
 pub const SYS_SENDFILE64: usize = 71;
-pub const SYS_PSELECT6_TIME32: usize = 72;
+pub const SYS_PSELECT6: usize = 72;
 pub const SYS_PPOLL: usize = 73;
 pub const SYS_SIGNALFD4: usize = 74;
 pub const SYS_VMSPLICE: usize = 75;
@@ -98,7 +98,7 @@ pub const SYS_EXIT_GROUP: usize = 94;
 pub const SYS_WAITID: usize = 95;
 pub const SYS_SET_TID_ADDRESS: usize = 96;
 pub const SYS_UNSHARE: usize = 97;
-pub const SYS_FUTEX: usize = 422;
+pub const SYS_FUTEX: usize = 98;
 pub const SYS_SET_ROBUST_LIST: usize = 99;
 pub const SYS_GET_ROBUST_LIST: usize = 100;
 pub const SYS_NANOSLEEP: usize = 101;
@@ -114,7 +114,7 @@ pub const SYS_TIMER_DELETE: usize = 111;
 pub const SYS_CLOCK_SETTIME: usize = 404;
 pub const SYS_CLOCK_GETTIME: usize = 113;
 pub const SYS_CLOCK_GETRES: usize = 406;
-pub const SYS_CLOCK_NANOSLEEP: usize = 407;
+pub const SYS_CLOCK_NANOSLEEP: usize = 115;
 pub const SYS_SYSLOG: usize = 116;
 pub const SYS_PTRACE: usize = 117;
 pub const SYS_SCHED_SETPARAM: usize = 118;

+ 1 - 1
crates/slab_allocator/src/slab_cache.rs

@@ -89,7 +89,7 @@ where
     Allocator: PageAlloc<RawPage = Raw>,
 {
     pub(crate) const fn new_in(object_size: u32) -> Self {
-        // avoid uncessary branch in alloc and dealloc
+        // avoid unnecessary branch in alloc and dealloc
         assert!(object_size <= PAGE_SIZE as u32 / 2);
 
         Self {

+ 7 - 1
script/build-img.sh

@@ -1,7 +1,10 @@
 #!/bin/sh
 
 OS=`uname -s`
-SUDO=sudo
+
+if sudo --version > /dev/null 2>&1; then
+    SUDO=sudo
+fi
 
 if [ "$OUTPUT" = "" ]; then
     OUTPUT="build/fs-$ARCH.img"
@@ -37,6 +40,9 @@ if [ "$ARCH" = "x86_64" ]; then
 elif [ "$ARCH" = "riscv64" ]; then
     $SUDO cp ./user-programs/busybox.static build/mnt/busybox
     $SUDO cp ./user-programs/init_script_riscv64.sh build/mnt/initsh
+elif [ "$ARCH" = "loongarch64" ]; then
+    $SUDO cp ./user-programs/busybox.la64 build/mnt/busybox
+    $SUDO cp ./user-programs/init_script_loongarch64.sh build/mnt/initsh
 fi
 
 # Add your custom files here

+ 13 - 0
script/test.sh

@@ -0,0 +1,13 @@
+#!/bin/sh
+
+SUCCESS_MSG="###$RANDOM :SuCCeSS: $RANDOM###"
+
+if [ "$ARCH" = "" ]; then
+    echo "Error: ARCH environment variable is not set." >&2
+    exit 1
+fi
+
+printf "ls\necho \"$SUCCESS_MSG\"\npoweroff\n" \
+    | make test-run ARCH=$ARCH MODE=release QEMU=qemu-system-$ARCH \
+    | tee build/test-$$.log \
+    | grep "$SUCCESS_MSG" > /dev/null && echo TEST\ $$\ WITH\ ARCH=$ARCH\ PASSED

+ 1 - 1
src/driver.rs

@@ -2,7 +2,7 @@ pub mod ahci;
 pub mod e1000e;
 pub mod serial;
 
-#[cfg(target_arch = "riscv64")]
+#[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))]
 pub mod virtio;
 
 #[cfg(target_arch = "riscv64")]

+ 13 - 1
src/driver/ahci/mod.rs

@@ -168,7 +168,19 @@ impl PCIDriver for AHCIDriver {
             Err(EINVAL)?
         };
 
-        let base = PAddr::from(header.bars[PCI_REG_ABAR] as usize);
+        let bar5 = header.bars().iter().nth(PCI_REG_ABAR).ok_or(EINVAL)?.get();
+        let base = match bar5 {
+            pcie::Bar::MemoryMapped32 {
+                base: Some(base), ..
+            } => PAddr::from(base.get() as usize),
+
+            pcie::Bar::MemoryMapped64 {
+                base: Some(base), ..
+            } => PAddr::from(base.get() as usize),
+
+            _ => todo!("Unsupported BAR type"),
+        };
+
         let irqno = header.interrupt_line;
 
         // use MMIO

+ 10 - 6
src/driver/e1000e.rs

@@ -451,15 +451,19 @@ impl PCIDriver for Driver {
             Err(EINVAL)?
         };
 
-        let bar0 = header.bars[0];
+        let bar0 = header.bars().iter().next().ok_or(EINVAL)?.get();
+        let base = match bar0 {
+            pcie::Bar::MemoryMapped32 {
+                base: Some(base), ..
+            } => PAddr::from(base.get() as usize),
 
-        if bar0 & 0xf != 0 {
-            Err(EINVAL)?;
-        }
+            pcie::Bar::MemoryMapped64 {
+                base: Some(base), ..
+            } => PAddr::from(base.get() as usize),
 
-        device.enable_bus_mastering();
+            _ => todo!("Unsupported BAR type"),
+        };
 
-        let base = PAddr::from(bar0 as usize);
         let e1000e = E1000eDev::new(base, header.interrupt_line as usize)?;
 
         let dev = netdev::register_netdev(e1000e)?;

+ 17 - 1
src/driver/serial.rs

@@ -10,7 +10,6 @@ use crate::{
 use alloc::{collections::vec_deque::VecDeque, format, sync::Arc};
 use bitflags::bitflags;
 use core::pin::pin;
-use eonix_mm::address::PAddr;
 use eonix_runtime::{run::FutureRun, scheduler::Scheduler};
 use eonix_sync::{SpinIrq as _, WaitList};
 use io::SerialIO;
@@ -215,6 +214,7 @@ pub fn init() -> KResult<()> {
     #[cfg(target_arch = "riscv64")]
     {
         use eonix_hal::arch_exported::fdt::FDT;
+        use eonix_mm::address::PAddr;
 
         if let Some(uart) = FDT.find_compatible(&["ns16550a", "ns16550"]) {
             let regs = uart.reg().unwrap();
@@ -238,5 +238,21 @@ pub fn init() -> KResult<()> {
         }
     }
 
+    #[cfg(target_arch = "loongarch64")]
+    {
+        use eonix_mm::address::PAddr;
+
+        let port = unsafe {
+            // SAFETY: The base address is provided by the FDT and should be valid.
+            SerialIO::new(PAddr::from(0x1fe0_01e0))
+        };
+
+        let serial = Serial::new(0, port)?;
+        serial.register_as_char_device(
+            // 2 or 4 here, let's try 2 first!
+            2,
+        )?;
+    }
+
     Ok(())
 }

+ 16 - 8
src/driver/serial/io.rs

@@ -1,6 +1,6 @@
 use super::SerialRegister;
 use core::ptr::NonNull;
-use eonix_hal::mm::ArchPhysAccess;
+use eonix_hal::{fence::memory_barrier, mm::ArchPhysAccess};
 use eonix_mm::address::{PAddr, PhysAccess};
 
 #[cfg(target_arch = "x86_64")]
@@ -82,31 +82,39 @@ impl SerialIO {
     }
 }
 
-#[cfg(target_arch = "riscv64")]
+#[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))]
 pub struct SerialIO {
     base_addr: NonNull<u8>,
 }
 
-#[cfg(target_arch = "riscv64")]
+#[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))]
 unsafe impl Send for SerialIO {}
 
-#[cfg(target_arch = "riscv64")]
+#[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))]
 unsafe impl Sync for SerialIO {}
 
-#[cfg(target_arch = "riscv64")]
+#[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))]
 impl SerialRegister for NonNull<u8> {
     fn read(&self) -> u8 {
         // SAFETY: `self` is a valid pointer to the serial port register.
-        unsafe { self.as_ptr().read_volatile() }
+        let retval = unsafe { self.as_ptr().read_volatile() };
+
+        #[cfg(target_arch = "loongarch64")]
+        memory_barrier();
+
+        retval
     }
 
     fn write(&self, data: u8) {
         // SAFETY: `self` is a valid pointer to the serial port register.
-        unsafe { self.as_ptr().write_volatile(data) }
+        unsafe { self.as_ptr().write_volatile(data) };
+
+        #[cfg(target_arch = "loongarch64")]
+        memory_barrier();
     }
 }
 
-#[cfg(target_arch = "riscv64")]
+#[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))]
 impl SerialIO {
     /// Creates a new `SerialIO` instance with the given physical address.
     ///

+ 10 - 150
src/driver/virtio.rs

@@ -1,158 +1,18 @@
 mod virtio_blk;
 
-#[cfg(not(target_arch = "riscv64"))]
-compile_error!("VirtIO drivers are only supported on RISC-V architecture");
+#[cfg(not(any(target_arch = "riscv64", target_arch = "loongarch64")))]
+compile_error!("VirtIO drivers are only supported on RISC-V and LoongArch64 architecture");
 
-use crate::kernel::{
-    block::{make_device, BlockDevice},
-    mem::{AsMemoryBlock, MemoryBlock, Page},
-};
-use alloc::{sync::Arc, vec::Vec};
-use core::num::NonZero;
-use eonix_hal::{arch_exported::fdt::FDT, mm::ArchPhysAccess};
-use eonix_log::{println_info, println_warn};
-use eonix_mm::{
-    address::{Addr, PAddr, PhysAccess},
-    paging::PFN,
-};
-use eonix_runtime::task::Task;
-use eonix_sync::Spin;
-use virtio_drivers::{
-    device::blk::VirtIOBlk,
-    transport::{mmio::MmioTransport, Transport},
-    Hal,
-};
+#[cfg(target_arch = "riscv64")]
+mod riscv64;
 
-pub struct HAL;
-
-unsafe impl Hal for HAL {
-    fn dma_alloc(
-        pages: usize,
-        _direction: virtio_drivers::BufferDirection,
-    ) -> (virtio_drivers::PhysAddr, core::ptr::NonNull<u8>) {
-        let page = Page::alloc_at_least(pages);
-
-        let paddr = page.start().addr();
-        let ptr = page.as_memblk().as_byte_ptr();
-        page.into_raw();
-
-        (paddr, ptr)
-    }
-
-    unsafe fn dma_dealloc(
-        paddr: virtio_drivers::PhysAddr,
-        _vaddr: core::ptr::NonNull<u8>,
-        _pages: usize,
-    ) -> i32 {
-        let pfn = PFN::from(PAddr::from(paddr));
-
-        unsafe {
-            // SAFETY: The caller ensures that the pfn corresponds to a valid
-            //         page allocated by `dma_alloc`.
-            Page::from_raw(pfn);
-        }
-
-        0
-    }
-
-    unsafe fn mmio_phys_to_virt(
-        paddr: virtio_drivers::PhysAddr,
-        size: usize,
-    ) -> core::ptr::NonNull<u8> {
-        MemoryBlock::new(NonZero::new(paddr).expect("paddr must be non-zero"), size).as_byte_ptr()
-    }
-
-    unsafe fn share(
-        buffer: core::ptr::NonNull<[u8]>,
-        _direction: virtio_drivers::BufferDirection,
-    ) -> virtio_drivers::PhysAddr {
-        let paddr = unsafe {
-            // SAFETY: The caller ensures that the buffer is valid.
-            ArchPhysAccess::from_ptr(buffer.cast::<u8>())
-        };
-
-        paddr.addr()
-    }
-
-    unsafe fn unshare(
-        _paddr: virtio_drivers::PhysAddr,
-        _buffer: core::ptr::NonNull<[u8]>,
-        _direction: virtio_drivers::BufferDirection,
-    ) {
-    }
-}
+#[cfg(target_arch = "loongarch64")]
+mod loongarch64;
 
 pub fn init_virtio_devices() {
-    let mut disk_id = 0;
-    let mut virtio_devices: Vec<_> = FDT
-        .all_nodes()
-        .filter(|node| {
-            node.compatible()
-                .is_some_and(|compatible| compatible.all().any(|s| s == "virtio,mmio"))
-        })
-        .filter_map(|node| node.reg())
-        .flatten()
-        .collect();
-    virtio_devices.sort_by_key(|reg| reg.starting_address);
-
-    for reg in virtio_devices {
-        let base = PAddr::from(reg.starting_address as usize);
-        let size = reg.size.expect("Virtio device must have a size");
-
-        let base = unsafe {
-            // SAFETY: We get the base address from the FDT, which is guaranteed to be valid.
-            ArchPhysAccess::as_ptr(base)
-        };
-
-        match unsafe { MmioTransport::new(base, size) } {
-            Ok(transport) => match transport.device_type() {
-                virtio_drivers::transport::DeviceType::Block => {
-                    let block_device = VirtIOBlk::<HAL, _>::new(transport)
-                        .expect("Failed to initialize VirtIO Block device");
-
-                    let block_device = BlockDevice::register_disk(
-                        make_device(8, 16 * disk_id),
-                        2147483647,
-                        Arc::new(Spin::new(block_device)),
-                    )
-                    .expect("Failed to register VirtIO Block device");
-
-                    Task::block_on(block_device.partprobe())
-                        .expect("Failed to probe partitions for VirtIO Block device");
+    #[cfg(target_arch = "riscv64")]
+    riscv64::init();
 
-                    disk_id += 1;
-                }
-                virtio_drivers::transport::DeviceType::Network => {
-                    println_info!(
-                        "Initializing Virtio Network device at {:?} with size {:#x}",
-                        base,
-                        size
-                    );
-                }
-                virtio_drivers::transport::DeviceType::Console => {
-                    println_info!(
-                        "Initializing Virtio Console at {:?} with size {:#x}",
-                        base,
-                        size
-                    );
-                }
-                virtio_drivers::transport::DeviceType::EntropySource => {
-                    println_info!(
-                        "Initializing Virtio Entropy Source at {:?} with size {:#x}",
-                        base,
-                        size
-                    );
-                }
-                _ => {}
-            },
-            Err(err) => {
-                println_warn!(
-                    "Failed to initialize Virtio device at {:?} with size {:#x}: {}",
-                    base,
-                    size,
-                    err
-                );
-            }
-        }
-    }
+    #[cfg(target_arch = "loongarch64")]
+    loongarch64::init();
 }

+ 154 - 0
src/driver/virtio/loongarch64.rs

@@ -0,0 +1,154 @@
+use super::virtio_blk::HAL;
+use crate::kernel::{
+    block::{make_device, BlockDevice},
+    constants::EIO,
+    pcie::{self, PCIDevice, PCIDriver, PciError, SegmentGroup},
+};
+use alloc::sync::Arc;
+use core::sync::atomic::{AtomicUsize, Ordering};
+use eonix_hal::{fence::memory_barrier, mm::ArchPhysAccess};
+use eonix_log::println_warn;
+use eonix_mm::address::PhysAccess;
+use eonix_runtime::task::Task;
+use eonix_sync::Spin;
+use virtio_drivers::{
+    device::blk::VirtIOBlk,
+    transport::{
+        pci::{
+            bus::{ConfigurationAccess, DeviceFunction, PciRoot},
+            PciTransport,
+        },
+        DeviceType, Transport,
+    },
+};
+
+impl ConfigurationAccess for &SegmentGroup {
+    fn read_word(&self, device_function: DeviceFunction, register_offset: u8) -> u32 {
+        let conf_space = self
+            .get_conf_space(
+                device_function.bus,
+                device_function.device,
+                device_function.function,
+            )
+            .expect("The given device function is out of range");
+
+        let pointer = unsafe {
+            // SAFETY: The base address is guaranteed to be valid by the PCI spec.
+            ArchPhysAccess::as_ptr(conf_space.base + register_offset as usize)
+        };
+
+        memory_barrier();
+
+        let value = unsafe {
+            // SAFETY: The pointer is guaranteed to be valid and aligned for reading a u32 from.
+            pointer.read_volatile()
+        };
+
+        memory_barrier();
+
+        value
+    }
+
+    fn write_word(&mut self, device_function: DeviceFunction, register_offset: u8, data: u32) {
+        let conf_space = self
+            .get_conf_space(
+                device_function.bus,
+                device_function.device,
+                device_function.function,
+            )
+            .expect("The given device function is out of range");
+
+        let pointer = unsafe {
+            // SAFETY: The base address is guaranteed to be valid by the PCI spec.
+            ArchPhysAccess::as_ptr(conf_space.base + register_offset as usize)
+        };
+
+        memory_barrier();
+
+        unsafe {
+            // SAFETY: The pointer is guaranteed to be valid and aligned for writing a u32 to.
+            pointer.write_volatile(data)
+        };
+
+        memory_barrier();
+    }
+
+    unsafe fn unsafe_clone(&self) -> Self {
+        self
+    }
+}
+
+struct VirtIODriver {
+    disk_id: AtomicUsize,
+}
+
+impl PCIDriver for VirtIODriver {
+    fn vendor_id(&self) -> u16 {
+        0x1af4
+    }
+
+    fn device_id(&self) -> u16 {
+        0x1001
+    }
+
+    fn handle_device(&self, device: Arc<PCIDevice<'static>>) -> Result<(), PciError> {
+        let transport = PciTransport::new::<HAL, _>(
+            &mut PciRoot::new(device.segment_group()),
+            DeviceFunction {
+                bus: device.config_space().bus,
+                device: device.config_space().device,
+                function: device.config_space().function,
+            },
+        )
+        .map_err(|err| {
+            println_warn!(
+                "Failed to create VirtIO transport for device {}:{}:{}: {}",
+                device.config_space().bus,
+                device.config_space().device,
+                device.config_space().function,
+                err
+            );
+            EIO
+        })?;
+
+        if transport.device_type() != DeviceType::Block {
+            println_warn!(
+                "Detected non-block VirtIO device ({:?}) in virtio block driver: {}:{}:{}",
+                transport.device_type(),
+                device.config_space().bus,
+                device.config_space().device,
+                device.config_space().function,
+            );
+
+            Err(EIO)?;
+        }
+
+        let virtio_block = VirtIOBlk::<HAL, _>::new(transport).map_err(|err| {
+            println_warn!("Failed to initialize VirtIO Block device: {}", err);
+            EIO
+        })?;
+
+        let block_device = BlockDevice::register_disk(
+            make_device(8, 16 * self.disk_id.fetch_add(1, Ordering::AcqRel) as u32),
+            2147483647, // TODO: Get size from device
+            Arc::new(Spin::new(virtio_block)),
+        )?;
+
+        Task::block_on(block_device.partprobe()).map_err(|err| {
+            println_warn!(
+                "Failed to probe partitions for VirtIO Block device: {}",
+                err
+            );
+            EIO
+        })?;
+
+        Ok(())
+    }
+}
+
+pub fn init() {
+    pcie::register_driver(VirtIODriver {
+        disk_id: AtomicUsize::new(0),
+    })
+    .expect("Failed to register VirtIO driver");
+}

+ 96 - 0
src/driver/virtio/riscv64.rs

@@ -0,0 +1,96 @@
+use super::virtio_blk::HAL;
+use crate::kernel::{
+    block::{make_device, BlockDevice},
+    mem::{AsMemoryBlock, MemoryBlock, Page},
+};
+use alloc::{sync::Arc, vec::Vec};
+use core::num::NonZero;
+use eonix_hal::arch_exported::fdt::FDT;
+use eonix_hal::mm::ArchPhysAccess;
+use eonix_log::{println_info, println_warn};
+use eonix_mm::{
+    address::{Addr, PAddr, PhysAccess},
+    paging::PFN,
+};
+use eonix_runtime::task::Task;
+use eonix_sync::Spin;
+use virtio_drivers::{
+    device::blk::VirtIOBlk,
+    transport::{mmio::MmioTransport, Transport},
+    Hal,
+};
+
+pub fn init() {
+    let mut disk_id = 0;
+    let mut virtio_devices: Vec<_> = FDT
+        .all_nodes()
+        .filter(|node| {
+            node.compatible()
+                .is_some_and(|compatible| compatible.all().any(|s| s == "virtio,mmio"))
+        })
+        .filter_map(|node| node.reg())
+        .flatten()
+        .collect();
+    virtio_devices.sort_by_key(|reg| reg.starting_address);
+
+    for reg in virtio_devices {
+        let base = PAddr::from(reg.starting_address as usize);
+        let size = reg.size.expect("Virtio device must have a size");
+
+        let base = unsafe {
+            // SAFETY: We get the base address from the FDT, which is guaranteed to be valid.
+            ArchPhysAccess::as_ptr(base)
+        };
+
+        match unsafe { MmioTransport::new(base, size) } {
+            Ok(transport) => match transport.device_type() {
+                virtio_drivers::transport::DeviceType::Block => {
+                    let block_device = VirtIOBlk::<HAL, _>::new(transport)
+                        .expect("Failed to initialize VirtIO Block device");
+
+                    let block_device = BlockDevice::register_disk(
+                        make_device(8, 16 * disk_id),
+                        2147483647,
+                        Arc::new(Spin::new(block_device)),
+                    )
+                    .expect("Failed to register VirtIO Block device");
+
+                    Task::block_on(block_device.partprobe())
+                        .expect("Failed to probe partitions for VirtIO Block device");
+
+                    disk_id += 1;
+                }
+                virtio_drivers::transport::DeviceType::Network => {
+                    println_info!(
+                        "Initializing Virtio Network device at {:?} with size {:#x}",
+                        base,
+                        size
+                    );
+                }
+                virtio_drivers::transport::DeviceType::Console => {
+                    println_info!(
+                        "Initializing Virtio Console at {:?} with size {:#x}",
+                        base,
+                        size
+                    );
+                }
+                virtio_drivers::transport::DeviceType::EntropySource => {
+                    println_info!(
+                        "Initializing Virtio Entropy Source at {:?} with size {:#x}",
+                        base,
+                        size
+                    );
+                }
+                _ => {}
+            },
+            Err(err) => {
+                println_warn!(
+                    "Failed to initialize Virtio device at {:?} with size {:#x}: {}",
+                    base,
+                    size,
+                    err
+                );
+            }
+        }
+    }
+}

+ 70 - 4
src/driver/virtio/virtio_blk.rs

@@ -1,17 +1,83 @@
-use super::HAL;
 use crate::{
     io::Chunks,
     kernel::{
         block::{BlockDeviceRequest, BlockRequestQueue},
         constants::EIO,
-        mem::AsMemoryBlock,
+        mem::{AsMemoryBlock, Page},
     },
     prelude::KResult,
 };
+use eonix_hal::mm::ArchPhysAccess;
+use eonix_mm::{
+    address::{Addr, PAddr, PhysAccess},
+    paging::PFN,
+};
 use eonix_sync::Spin;
-use virtio_drivers::{device::blk::VirtIOBlk, transport::mmio::MmioTransport};
+use virtio_drivers::{device::blk::VirtIOBlk, transport::Transport, Hal};
+
+pub struct HAL;
+
+unsafe impl Hal for HAL {
+    fn dma_alloc(
+        pages: usize,
+        _direction: virtio_drivers::BufferDirection,
+    ) -> (virtio_drivers::PhysAddr, core::ptr::NonNull<u8>) {
+        let page = Page::alloc_at_least(pages);
+
+        let paddr = page.start().addr();
+        let ptr = page.as_memblk().as_byte_ptr();
+        page.into_raw();
+
+        (paddr, ptr)
+    }
+
+    unsafe fn dma_dealloc(
+        paddr: virtio_drivers::PhysAddr,
+        _vaddr: core::ptr::NonNull<u8>,
+        _pages: usize,
+    ) -> i32 {
+        let pfn = PFN::from(PAddr::from(paddr));
+
+        unsafe {
+            // SAFETY: The caller ensures that the pfn corresponds to a valid
+            //         page allocated by `dma_alloc`.
+            Page::from_raw(pfn);
+        }
+
+        0
+    }
+
+    unsafe fn mmio_phys_to_virt(
+        paddr: virtio_drivers::PhysAddr,
+        _size: usize,
+    ) -> core::ptr::NonNull<u8> {
+        unsafe { ArchPhysAccess::as_ptr(PAddr::from(paddr)) }
+    }
+
+    unsafe fn share(
+        buffer: core::ptr::NonNull<[u8]>,
+        _direction: virtio_drivers::BufferDirection,
+    ) -> virtio_drivers::PhysAddr {
+        let paddr = unsafe {
+            // SAFETY: The caller ensures that the buffer is valid.
+            ArchPhysAccess::from_ptr(buffer.cast::<u8>())
+        };
+
+        paddr.addr()
+    }
+
+    unsafe fn unshare(
+        _paddr: virtio_drivers::PhysAddr,
+        _buffer: core::ptr::NonNull<[u8]>,
+        _direction: virtio_drivers::BufferDirection,
+    ) {
+    }
+}
 
-impl BlockRequestQueue for Spin<VirtIOBlk<HAL, MmioTransport<'_>>> {
+impl<T> BlockRequestQueue for Spin<VirtIOBlk<HAL, T>>
+where
+    T: Transport + Send,
+{
     fn max_request_pages(&self) -> u64 {
         1024
     }

+ 41 - 5
src/fs/ext4.rs

@@ -1,5 +1,6 @@
 use core::sync::atomic::{AtomicU32, AtomicU64, Ordering};
 
+use crate::kernel::mem::{PageCache, PageCacheBackend};
 use crate::{
     io::{Buffer, ByteBuffer, Stream},
     kernel::{
@@ -21,9 +22,10 @@ use crate::{
     path::Path,
     prelude::*,
 };
+use alloc::sync::Weak;
 use alloc::{
     collections::btree_map::{BTreeMap, Entry},
-    sync::{Arc, Weak},
+    sync::Arc,
 };
 use another_ext4::{
     Block, BlockDevice as Ext4BlockDeviceTrait, Ext4, FileType, InodeMode, PBlockId,
@@ -138,7 +140,7 @@ impl Ext4Fs {
                 let mode = *idata.mode.get_mut();
                 if s_isreg(mode) {
                     vacant
-                        .insert(Ext4Inode::File(Arc::new(FileInode { idata })))
+                        .insert(Ext4Inode::File(FileInode::with_idata(idata)))
                         .clone()
                         .into_inner()
                 } else if s_isdir(mode) {
@@ -149,7 +151,7 @@ impl Ext4Fs {
                 } else {
                     println_warn!("ext4: Unsupported inode type: {mode:#o}");
                     vacant
-                        .insert(Ext4Inode::File(Arc::new(FileInode { idata })))
+                        .insert(Ext4Inode::File(FileInode::with_idata(idata)))
                         .clone()
                         .into_inner()
                 }
@@ -220,7 +222,9 @@ impl Ext4Inode {
 }
 
 define_struct_inode! {
-    struct FileInode;
+    struct FileInode {
+        page_cache: PageCache,
+    }
 }
 
 define_struct_inode! {
@@ -228,8 +232,17 @@ define_struct_inode! {
 }
 
 impl FileInode {
+    fn with_idata(idata: InodeData) -> Arc<Self> {
+        let inode = Arc::new_cyclic(|weak_self: &Weak<FileInode>| Self {
+            idata,
+            page_cache: PageCache::new(weak_self.clone()),
+        });
+
+        inode
+    }
+
     pub fn new(ino: Ino, vfs: Weak<dyn Vfs>, mode: Mode) -> Arc<Self> {
-        Arc::new_cyclic(|_| FileInode {
+        Arc::new_cyclic(|weak_self: &Weak<FileInode>| Self {
             idata: {
                 let inode_data = InodeData::new(ino, vfs);
                 inode_data
@@ -238,12 +251,35 @@ impl FileInode {
                 inode_data.nlink.store(1, Ordering::Relaxed);
                 inode_data
             },
+            page_cache: PageCache::new(weak_self.clone()),
         })
     }
 }
 
+impl PageCacheBackend for FileInode {
+    fn read_page(&self, page: &mut crate::kernel::mem::CachePage, offset: usize) -> KResult<usize> {
+        self.read_direct(page, offset)
+    }
+
+    fn write_page(&self, page: &crate::kernel::mem::CachePage, offset: usize) -> KResult<usize> {
+        todo!()
+    }
+
+    fn size(&self) -> usize {
+        self.size.load(Ordering::Relaxed) as usize
+    }
+}
+
 impl Inode for FileInode {
+    fn page_cache(&self) -> Option<&PageCache> {
+        Some(&self.page_cache)
+    }
+
     fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize> {
+        Task::block_on(self.page_cache.read(buffer, offset))
+    }
+
+    fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize> {
         let vfs = self.vfs.upgrade().ok_or(EIO)?;
         let ext4fs = vfs.as_any().downcast_ref::<Ext4Fs>().unwrap();
 

+ 66 - 6
src/fs/fat32.rs

@@ -1,13 +1,19 @@
 mod dir;
 mod file;
 
+use crate::io::Stream;
 use crate::kernel::constants::EIO;
+use crate::kernel::mem::AsMemoryBlock;
+use crate::kernel::vfs::inode::WriteOffset;
 use crate::{
     io::{Buffer, ByteBuffer, UninitBuffer},
     kernel::{
         block::{make_device, BlockDevice, BlockDeviceRequest},
         constants::{S_IFDIR, S_IFREG},
-        mem::paging::Page,
+        mem::{
+            paging::Page,
+            {CachePage, PageCache, PageCacheBackend},
+        },
         vfs::{
             dentry::Dentry,
             inode::{define_struct_inode, Ino, Inode, InodeData},
@@ -32,6 +38,8 @@ use file::ClusterRead;
 
 type ClusterNo = u32;
 
+const SECTOR_SIZE: usize = 512;
+
 #[derive(Clone, Copy)]
 #[repr(C, packed)]
 struct Bootsector {
@@ -231,13 +239,16 @@ impl FatInode {
 }
 
 define_struct_inode! {
-    struct FileInode;
+    struct FileInode {
+        page_cache: PageCache,
+    }
 }
 
 impl FileInode {
     fn new(ino: Ino, weak: Weak<FatFs>, size: u32) -> Arc<Self> {
-        let inode = Arc::new(Self {
+        let inode = Arc::new_cyclic(|weak_self: &Weak<FileInode>| Self {
             idata: InodeData::new(ino, weak),
+            page_cache: PageCache::new(weak_self.clone()),
         });
 
         // Safety: We are initializing the inode
@@ -250,7 +261,15 @@ impl FileInode {
 }
 
 impl Inode for FileInode {
+    fn page_cache(&self) -> Option<&PageCache> {
+        Some(&self.page_cache)
+    }
+
     fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize> {
+        Task::block_on(self.page_cache.read(buffer, offset))
+    }
+
+    fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize> {
         let vfs = self.vfs.upgrade().ok_or(EIO)?;
         let vfs = vfs.as_any().downcast_ref::<FatFs>().unwrap();
         let fat = Task::block_on(vfs.fat.read());
@@ -259,16 +278,57 @@ impl Inode for FileInode {
             return Ok(0);
         }
 
-        let iter = ClusterIterator::new(fat.as_ref(), self.ino as ClusterNo).read(vfs, offset);
+        let cluster_size = vfs.sectors_per_cluster as usize * SECTOR_SIZE;
+        assert!(cluster_size <= 0x1000, "Cluster size is too large");
+
+        let skip_clusters = offset / cluster_size;
+        let inner_offset = offset % cluster_size;
+
+        let cluster_iter =
+            ClusterIterator::new(fat.as_ref(), self.ino as ClusterNo).skip(skip_clusters);
+
+        let buffer_page = Page::alloc();
+        for cluster in cluster_iter {
+            vfs.read_cluster(cluster, &buffer_page)?;
+
+            let data = unsafe {
+                // SAFETY: We are the only one holding this page.
+                &buffer_page.as_memblk().as_bytes()[inner_offset..]
+            };
 
-        for data in iter {
-            if buffer.fill(data?)?.should_stop() {
+            let end = offset + data.len();
+            let real_end = core::cmp::min(end, self.size.load(Ordering::Relaxed) as usize);
+            let real_size = real_end - offset;
+
+            if buffer.fill(&data[..real_size])?.should_stop() {
                 break;
             }
         }
 
         Ok(buffer.wrote())
     }
+
+    fn write(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult<usize> {
+        todo!()
+    }
+
+    fn write_direct(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult<usize> {
+        todo!()
+    }
+}
+
+impl PageCacheBackend for FileInode {
+    fn read_page(&self, page: &mut CachePage, offset: usize) -> KResult<usize> {
+        self.read_direct(page, offset)
+    }
+
+    fn write_page(&self, page: &CachePage, offset: usize) -> KResult<usize> {
+        todo!()
+    }
+
+    fn size(&self) -> usize {
+        self.size.load(Ordering::Relaxed) as usize
+    }
 }
 
 define_struct_inode! {

+ 1 - 0
src/fs/mod.rs

@@ -1,4 +1,5 @@
 pub mod fat32;
 pub mod procfs;
+pub mod shm;
 pub mod tmpfs;
 pub mod ext4;

+ 146 - 0
src/fs/shm.rs

@@ -0,0 +1,146 @@
+use core::sync::atomic::{AtomicU32, Ordering};
+
+use alloc::{collections::btree_map::BTreeMap, sync::Arc};
+use bitflags::bitflags;
+use eonix_sync::{LazyLock, Mutex};
+
+use crate::{
+    fs::tmpfs::{DirectoryInode, FileInode, TmpFs},
+    kernel::{constants::ENOSPC, vfs::inode::Mode},
+    prelude::KResult,
+};
+
+bitflags! {
+    #[derive(Debug, Clone, Copy)]
+    pub struct ShmFlags: u32 {
+        /// Create a new segment. If this flag is not used, then shmget() will
+        /// find the segment associated with key and check to see if the user
+        /// has permission to access the segment.
+        const IPC_CREAT = 0o1000;
+        /// This flag is used with IPC_CREAT to ensure that this call creates
+        /// the segment.  If the segment already exists, the call fails.
+        const IPC_EXCL = 0o2000;
+
+        /// Attach the segment for read-only access.If this flag is not specified,
+        /// the segment is attached for read and write access, and the process
+        /// must have read and write permission for the segment.
+        const SHM_RDONLY = 0o10000;
+        /// round attach address to SHMLBA boundary
+        const SHM_RND = 0o20000;
+        /// Allow the contents of the segment to be executed.
+        const SHM_EXEC = 0o100000;
+    }
+}
+
+pub const IPC_PRIVATE: usize = 0;
+
+pub struct ShmManager {
+    tmpfs: Arc<TmpFs>,
+    root: Arc<DirectoryInode>,
+    areas: BTreeMap<u32, ShmArea>,
+}
+
+#[repr(C)]
+#[derive(Default, Clone, Copy, Debug)]
+pub struct IpcPerm {
+    key: i32,
+    uid: u32,
+    gid: u32,
+    cuid: u32,
+    cgid: u32,
+    mode: u16,
+    seq: u16,
+}
+
+#[repr(C)]
+#[derive(Debug, Clone, Copy)]
+pub struct ShmIdDs {
+    // Ownership and permissions
+    pub shm_perm: IpcPerm,
+    // Size of segment (bytes). In our system, this must be aligned
+    pub shm_segsz: usize,
+    // Last attach time
+    pub shm_atime: usize,
+    // Last detach time
+    pub shm_dtime: usize,
+    // Creation time/time of last modification via shmctl()
+    pub shm_ctime: usize,
+    // PID of creator
+    pub shm_cpid: usize,
+    // PID of last shmat(2)/shmdt(2)
+    pub shm_lpid: usize,
+    // No. of current attaches
+    pub shm_nattch: usize,
+}
+
+impl ShmIdDs {
+    fn new(size: usize, pid: u32) -> Self {
+        Self {
+            shm_perm: IpcPerm::default(),
+            shm_segsz: size,
+            shm_atime: 0,
+            shm_dtime: 0,
+            shm_ctime: 0, // Should set instant now
+            shm_cpid: pid as usize,
+            shm_lpid: 0,
+            shm_nattch: 0,
+        }
+    }
+}
+
+#[derive(Debug)]
+pub struct ShmArea {
+    pub area: Arc<FileInode>,
+    pub shmid_ds: ShmIdDs,
+}
+
+// A big lock here to protect the shared memory area.
+// Can be improved with finer-grained locking?
+pub static SHM_MANAGER: LazyLock<Mutex<ShmManager>> =
+    LazyLock::new(|| Mutex::new(ShmManager::new()));
+
+impl ShmManager {
+    fn new() -> Self {
+        let (tmpfs, root) = TmpFs::create(false).expect("should create shm_area successfully");
+        Self {
+            tmpfs,
+            root,
+            areas: BTreeMap::new(),
+        }
+    }
+
+    pub fn create_shared_area(&self, size: usize, pid: u32, mode: Mode) -> ShmArea {
+        let ino = self.tmpfs.assign_ino();
+        let vfs = Arc::downgrade(&self.tmpfs);
+        ShmArea {
+            area: FileInode::new(ino, vfs, size, mode),
+            shmid_ds: ShmIdDs::new(size, pid),
+        }
+    }
+
+    pub fn get(&self, shmid: u32) -> Option<&ShmArea> {
+        self.areas.get(&shmid)
+    }
+
+    pub fn insert(&mut self, shmid: u32, area: ShmArea) {
+        self.areas.insert(shmid, area);
+    }
+}
+
+pub fn gen_shm_id(key: usize) -> KResult<u32> {
+    const SHM_MAGIC: u32 = 114514000;
+
+    static NEXT_SHMID: AtomicU32 = AtomicU32::new(0);
+
+    if key == IPC_PRIVATE {
+        let shmid = NEXT_SHMID.fetch_add(1, Ordering::Relaxed);
+
+        if shmid >= SHM_MAGIC {
+            return Err(ENOSPC);
+        } else {
+            return Ok(shmid);
+        }
+    }
+
+    (key as u32).checked_add(SHM_MAGIC).ok_or(ENOSPC)
+}

+ 54 - 46
src/fs/tmpfs.rs

@@ -1,6 +1,8 @@
 use crate::io::Stream;
 use crate::kernel::constants::{EEXIST, EINVAL, EIO, EISDIR, ENOENT, ENOSYS, ENOTDIR};
+use crate::kernel::mem::{CachePage, PageCache, PageCacheBackend};
 use crate::kernel::timer::Instant;
+use crate::kernel::vfs::inode::InodeData;
 use crate::kernel::vfs::inode::RenameData;
 use crate::{
     io::Buffer,
@@ -16,7 +18,9 @@ use crate::{
     prelude::*,
 };
 use alloc::sync::{Arc, Weak};
+use core::fmt::Debug;
 use core::{ops::ControlFlow, sync::atomic::Ordering};
+use eonix_mm::paging::PAGE_SIZE;
 use eonix_runtime::task::Task;
 use eonix_sync::{AsProof as _, AsProofMut as _, Locked, Mutex, ProofMut};
 use itertools::Itertools;
@@ -58,7 +62,7 @@ impl Inode for NodeInode {
 }
 
 define_struct_inode! {
-    struct DirectoryInode {
+    pub(super) struct DirectoryInode {
         entries: Locked<Vec<(Arc<[u8]>, Ino)>, ()>,
     }
 }
@@ -152,7 +156,7 @@ impl Inode for DirectoryInode {
         let rwsem = Task::block_on(self.rwsem.write());
 
         let ino = vfs.assign_ino();
-        let file = FileInode::new(ino, self.vfs.clone(), mode);
+        let file = FileInode::new(ino, self.vfs.clone(), 0, mode);
 
         self.link(at.get_name(), file.as_ref(), rwsem.prove_mut());
         at.save_reg(file)
@@ -460,40 +464,60 @@ impl Inode for SymlinkInode {
 }
 
 define_struct_inode! {
-    struct FileInode {
-        filedata: Locked<Vec<u8>, ()>,
+    pub struct FileInode {
+        pages: PageCache,
+    }
+}
+
+impl Debug for FileInode {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        write!(f, "FileInode({:?})", self.idata)
     }
 }
 
 impl FileInode {
-    fn new(ino: Ino, vfs: Weak<dyn Vfs>, mode: Mode) -> Arc<Self> {
-        Self::new_locked(ino, vfs, |inode, rwsem| unsafe {
-            addr_of_mut_field!(inode, filedata).write(Locked::new(vec![], rwsem));
+    pub fn new(ino: Ino, vfs: Weak<dyn Vfs>, size: usize, mode: Mode) -> Arc<Self> {
+        let inode = Arc::new_cyclic(|weak_self: &Weak<FileInode>| FileInode {
+            idata: InodeData::new(ino, vfs),
+            pages: PageCache::new(weak_self.clone()),
+        });
 
-            addr_of_mut_field!(&mut *inode, mode).write((S_IFREG | (mode & 0o777)).into());
-            addr_of_mut_field!(&mut *inode, nlink).write(1.into());
-            addr_of_mut_field!(&mut *inode, ctime).write(Spin::new(Instant::now()));
-            addr_of_mut_field!(&mut *inode, mtime).write(Spin::new(Instant::now()));
-            addr_of_mut_field!(&mut *inode, atime).write(Spin::new(Instant::now()));
-        })
+        inode
+            .mode
+            .store(S_IFREG | (mode & 0o777), Ordering::Relaxed);
+        inode.nlink.store(1, Ordering::Relaxed);
+        inode.size.store(size as u64, Ordering::Relaxed);
+        inode
+    }
+}
+
+impl PageCacheBackend for FileInode {
+    fn read_page(&self, _cache_page: &mut CachePage, _offset: usize) -> KResult<usize> {
+        Ok(PAGE_SIZE)
+    }
+
+    fn write_page(&self, _page: &CachePage, _offset: usize) -> KResult<usize> {
+        Ok(PAGE_SIZE)
+    }
+
+    fn size(&self) -> usize {
+        self.size.load(Ordering::Relaxed) as usize
     }
 }
 
 impl Inode for FileInode {
-    fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize> {
-        // TODO: We don't need that strong guarantee, find some way to avoid locks
-        let lock = Task::block_on(self.rwsem.read());
+    fn page_cache(&self) -> Option<&PageCache> {
+        Some(&self.pages)
+    }
 
-        match self.filedata.access(lock.prove()).split_at_checked(offset) {
-            Some((_, data)) => buffer.fill(data).map(|result| result.allow_partial()),
-            None => Ok(0),
-        }
+    fn read(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize> {
+        let lock = Task::block_on(self.rwsem.write());
+        Task::block_on(self.pages.read(buffer, offset))
     }
 
     fn write(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult<usize> {
         // TODO: We don't need that strong guarantee, find some way to avoid locks
         let lock = Task::block_on(self.rwsem.write());
-        let filedata = self.filedata.access_mut(lock.prove_mut());
 
         let mut store_new_end = None;
         let offset = match offset {
@@ -506,41 +530,25 @@ impl Inode for FileInode {
             }
         };
 
-        let mut pos = offset;
-        loop {
-            if pos >= filedata.len() {
-                filedata.resize(pos + 4096, 0);
-            }
+        let wrote = Task::block_on(self.pages.write(stream, offset))?;
+        let cursor_end = offset + wrote;
 
-            match stream.poll_data(&mut filedata[pos..])? {
-                Some(data) => pos += data.len(),
-                None => break,
-            }
-        }
-
-        filedata.resize(pos, 0);
         if let Some(store_end) = store_new_end {
-            *store_end = pos;
+            *store_end = cursor_end;
         }
 
         // SAFETY: `lock` has done the synchronization
-        self.size.store(pos as u64, Ordering::Relaxed);
         *self.mtime.lock() = Instant::now();
+        self.size.store(cursor_end as u64, Ordering::Relaxed);
 
-        Ok(pos - offset)
+        Ok(wrote)
     }
 
     fn truncate(&self, length: usize) -> KResult<()> {
-        // TODO: We don't need that strong guarantee, find some way to avoid locks
         let lock = Task::block_on(self.rwsem.write());
-        let filedata = self.filedata.access_mut(lock.prove_mut());
-
-        // SAFETY: `lock` has done the synchronization
+        Task::block_on(self.pages.resize(length))?;
         self.size.store(length as u64, Ordering::Relaxed);
         *self.mtime.lock() = Instant::now();
-
-        filedata.resize(length, 0);
-
         Ok(())
     }
 
@@ -559,7 +567,7 @@ impl Inode for FileInode {
 }
 
 impl_any!(TmpFs);
-struct TmpFs {
+pub(super) struct TmpFs {
     next_ino: AtomicIno,
     readonly: bool,
     rename_lock: Mutex<()>,
@@ -580,11 +588,11 @@ impl Vfs for TmpFs {
 }
 
 impl TmpFs {
-    fn assign_ino(&self) -> Ino {
+    pub(super) fn assign_ino(&self) -> Ino {
         self.next_ino.fetch_add(1, Ordering::AcqRel)
     }
 
-    pub fn create(readonly: bool) -> KResult<(Arc<dyn Vfs>, Arc<dyn Inode>)> {
+    pub fn create(readonly: bool) -> KResult<(Arc<TmpFs>, Arc<DirectoryInode>)> {
         let tmpfs = Arc::new(Self {
             next_ino: AtomicIno::new(1),
             readonly,

+ 1 - 0
src/io.rs

@@ -3,6 +3,7 @@ use crate::prelude::*;
 use core::{cmp, mem::MaybeUninit};
 
 #[must_use]
+#[derive(Debug)]
 pub enum FillResult {
     Done(usize),
     Partial(usize),

+ 2 - 0
src/kernel/constants.rs

@@ -15,6 +15,7 @@ pub const SIG_SETMASK: u32 = 2;
 
 pub const CLOCK_REALTIME: u32 = 0;
 pub const CLOCK_MONOTONIC: u32 = 1;
+pub const CLOCK_REALTIME_COARSE: u32 = 5;
 
 pub const EPERM: u32 = 1;
 pub const ENOENT: u32 = 2;
@@ -35,6 +36,7 @@ pub const ENOTDIR: u32 = 20;
 pub const EISDIR: u32 = 21;
 pub const EINVAL: u32 = 22;
 pub const ENOTTY: u32 = 25;
+pub const ENOSPC: u32 = 28;
 pub const ESPIPE: u32 = 29;
 // pub const EROFS: u32 = 30;
 pub const EPIPE: u32 = 32;

+ 2 - 0
src/kernel/mem.rs

@@ -6,9 +6,11 @@ mod allocator;
 mod mm_area;
 mod mm_list;
 mod page_alloc;
+mod page_cache;
 
 pub use access::{AsMemoryBlock, MemoryBlock, PhysAccess};
 pub(self) use mm_area::MMArea;
 pub use mm_list::{handle_kernel_page_fault, FileMapping, MMList, Mapping, Permission};
 pub use page_alloc::{GlobalPageAlloc, RawPage};
+pub use page_cache::{CachePage, PageCache, PageCacheBackend};
 pub use paging::{Page, PageBuffer};

+ 1 - 25
src/kernel/mem/access.rs

@@ -1,11 +1,6 @@
 use core::{num::NonZero, ptr::NonNull};
 use eonix_hal::mm::ArchPhysAccess;
-use eonix_mm::address::{Addr as _, PAddr, PhysAccess as _PhysAccess, VAddr};
-use eonix_mm::paging::PFN;
-
-use super::page_alloc::RawPagePtr;
-
-const PHYS_OFFSET: usize = 0xffff_ff00_0000_0000;
+use eonix_mm::address::{PAddr, PhysAccess as _PhysAccess};
 
 /// A block of memory starting at a non-zero address and having a specific length.
 ///
@@ -141,22 +136,3 @@ impl PhysAccess for PAddr {
         ArchPhysAccess::as_ptr(*self)
     }
 }
-
-pub trait RawPageAccess {
-    /// Translate the address belonged RawPage ptr
-    /// Use it with care.
-    ///
-    /// # Panic
-    /// If the address is not properly aligned.
-    ///
-    /// # Safety
-    /// the address must be kernel accessible pointer
-    unsafe fn as_raw_page(&self) -> RawPagePtr;
-}
-
-impl RawPageAccess for VAddr {
-    unsafe fn as_raw_page(&self) -> RawPagePtr {
-        let pfn: PFN = PAddr::from(self.addr() - PHYS_OFFSET).into();
-        RawPagePtr::from(pfn)
-    }
-}

+ 7 - 8
src/kernel/mem/allocator.rs

@@ -1,13 +1,13 @@
+use super::page_alloc::RawPagePtr;
+use super::{AsMemoryBlock, GlobalPageAlloc, Page};
 use core::alloc::{GlobalAlloc, Layout};
-use eonix_mm::address::VAddr;
+use core::ptr::NonNull;
+use eonix_hal::mm::ArchPhysAccess;
+use eonix_mm::address::PhysAccess;
 use eonix_mm::paging::{PAGE_SIZE_BITS, PFN};
 use eonix_sync::LazyLock;
 use slab_allocator::SlabAllocator;
 
-use super::access::RawPageAccess;
-use super::page_alloc::RawPagePtr;
-use super::{AsMemoryBlock, GlobalPageAlloc, Page};
-
 static SLAB_ALLOCATOR: LazyLock<SlabAllocator<RawPagePtr, GlobalPageAlloc, 9>> =
     LazyLock::new(|| SlabAllocator::new_in(GlobalPageAlloc));
 
@@ -42,9 +42,8 @@ unsafe impl GlobalAlloc for Allocator {
         if size <= 2048 {
             SLAB_ALLOCATOR.dealloc(ptr, size)
         } else {
-            let vaddr = VAddr::from(ptr as usize);
-            let page_ptr = vaddr.as_raw_page();
-            let pfn = PFN::from(page_ptr);
+            let paddr = ArchPhysAccess::from_ptr(NonNull::new_unchecked(ptr));
+            let pfn = PFN::from(paddr);
             Page::from_raw(pfn);
         };
     }

+ 81 - 31
src/kernel/mem/mm_area.rs

@@ -1,18 +1,22 @@
 use super::mm_list::EMPTY_PAGE;
 use super::paging::AllocZeroed as _;
 use super::{AsMemoryBlock, Mapping, Page, Permission};
-use crate::io::ByteBuffer;
-use crate::KResult;
-use core::{borrow::Borrow, cell::UnsafeCell, cmp::Ordering};
+use crate::kernel::constants::EINVAL;
+use crate::prelude::KResult;
+use core::borrow::Borrow;
+use core::cell::UnsafeCell;
+use core::cmp;
 use eonix_mm::address::{AddrOps as _, VAddr, VRange};
 use eonix_mm::page_table::{PageAttribute, RawAttribute, PTE};
-use eonix_mm::paging::PFN;
+use eonix_mm::paging::{PAGE_SIZE, PFN};
+use eonix_runtime::task::Task;
 
 #[derive(Debug)]
 pub struct MMArea {
     range: UnsafeCell<VRange>,
     pub(super) mapping: Mapping,
     pub(super) permission: Permission,
+    pub is_shared: bool,
 }
 
 impl Clone for MMArea {
@@ -21,16 +25,18 @@ impl Clone for MMArea {
             range: UnsafeCell::new(self.range()),
             mapping: self.mapping.clone(),
             permission: self.permission,
+            is_shared: self.is_shared,
         }
     }
 }
 
 impl MMArea {
-    pub fn new(range: VRange, mapping: Mapping, permission: Permission) -> Self {
+    pub fn new(range: VRange, mapping: Mapping, permission: Permission, is_shared: bool) -> Self {
         Self {
             range: range.into(),
             mapping,
             permission,
+            is_shared,
         }
     }
 
@@ -56,9 +62,9 @@ impl MMArea {
         assert!(at.is_page_aligned());
 
         match self.range_borrow().cmp(&VRange::from(at)) {
-            Ordering::Less => (Some(self), None),
-            Ordering::Greater => (None, Some(self)),
-            Ordering::Equal => {
+            cmp::Ordering::Less => (Some(self), None),
+            cmp::Ordering::Greater => (None, Some(self)),
+            cmp::Ordering::Equal => {
                 let diff = at - self.range_borrow().start();
                 if diff == 0 {
                     return (None, Some(self));
@@ -71,6 +77,7 @@ impl MMArea {
                         Mapping::Anonymous => Mapping::Anonymous,
                         Mapping::File(mapping) => Mapping::File(mapping.offset(diff)),
                     },
+                    is_shared: self.is_shared,
                 };
 
                 let new_range = self.range_borrow().shrink(self.range_borrow().end() - at);
@@ -119,43 +126,81 @@ impl MMArea {
 
     /// # Arguments
     /// * `offset`: The offset from the start of the mapping, aligned to 4KB boundary.
-    pub fn handle_mmap(
+    pub async fn handle_mmap(
         &self,
         pfn: &mut PFN,
         attr: &mut PageAttribute,
         offset: usize,
+        write: bool,
     ) -> KResult<()> {
-        // TODO: Implement shared mapping
-        let Mapping::File(mapping) = &self.mapping else {
+        let Mapping::File(file_mapping) = &self.mapping else {
             panic!("Anonymous mapping should not be PA_MMAP");
         };
 
-        assert!(offset < mapping.length, "Offset out of range");
-        unsafe {
-            Page::with_raw(*pfn, |page| {
-                // SAFETY: `page` is marked as mapped, so others trying to read or write to
-                //         it will be blocked and enter the page fault handler, where they will
-                //         be blocked by the mutex held by us.
-                let page_data = page.as_memblk().as_bytes_mut();
+        assert!(offset < file_mapping.length, "Offset out of range");
 
-                let cnt_to_read = (mapping.length - offset).min(0x1000);
-                let cnt_read = mapping.file.read(
-                    &mut ByteBuffer::new(&mut page_data[..cnt_to_read]),
-                    mapping.offset + offset,
-                )?;
-
-                page_data[cnt_read..].fill(0);
+        let Some(page_cache) = file_mapping.file.page_cache() else {
+            panic!("Mapping file should have pagecache");
+        };
 
-                KResult::Ok(())
-            })?;
-        }
+        let file_offset = file_mapping.offset + offset;
+        let cnt_to_read = (file_mapping.length - offset).min(0x1000);
+
+        page_cache
+            .with_page(file_offset, |page, cache_page| {
+                // Non-write faults: we find page in pagecache and do mapping
+                // Write fault: we need to care about shared or private mapping.
+                if !write {
+                    // Bss is embarrassing in pagecache!
+                    // We have to assume cnt_to_read < PAGE_SIZE all bss
+                    if cnt_to_read < PAGE_SIZE {
+                        let new_page = Page::zeroed();
+                        unsafe {
+                            let page_data = new_page.as_memblk().as_bytes_mut();
+                            page_data[..cnt_to_read]
+                                .copy_from_slice(&page.as_memblk().as_bytes()[..cnt_to_read]);
+                        }
+                        *pfn = new_page.into_raw();
+                    } else {
+                        *pfn = page.clone().into_raw();
+                    }
+
+                    if self.permission.write {
+                        if self.is_shared {
+                            // The page may will not be written,
+                            // But we simply assume page will be dirty
+                            cache_page.set_dirty();
+                            attr.insert(PageAttribute::WRITE);
+                        } else {
+                            attr.insert(PageAttribute::COPY_ON_WRITE);
+                        }
+                    }
+                } else {
+                    if self.is_shared {
+                        cache_page.set_dirty();
+                        *pfn = page.clone().into_raw();
+                    } else {
+                        let new_page = Page::zeroed();
+                        unsafe {
+                            let page_data = new_page.as_memblk().as_bytes_mut();
+                            page_data[..cnt_to_read]
+                                .copy_from_slice(&page.as_memblk().as_bytes()[..cnt_to_read]);
+                        }
+                        *pfn = new_page.into_raw();
+                    }
+
+                    attr.insert(PageAttribute::WRITE);
+                }
+            })
+            .await?
+            .ok_or(EINVAL)?;
 
         attr.insert(PageAttribute::PRESENT);
         attr.remove(PageAttribute::MAPPED);
         Ok(())
     }
 
-    pub fn handle(&self, pte: &mut impl PTE, offset: usize) -> KResult<()> {
+    pub fn handle(&self, pte: &mut impl PTE, offset: usize, write: bool) -> KResult<()> {
         let mut attr = pte.get_attr().as_page_attr().expect("Not a page attribute");
         let mut pfn = pte.get_pfn();
 
@@ -164,10 +209,15 @@ impl MMArea {
         }
 
         if attr.contains(PageAttribute::MAPPED) {
-            self.handle_mmap(&mut pfn, &mut attr, offset)?;
+            Task::block_on(self.handle_mmap(&mut pfn, &mut attr, offset, write))?;
+        }
+
+        attr.insert(PageAttribute::ACCESSED);
+
+        if write {
+            attr.insert(PageAttribute::DIRTY);
         }
 
-        attr.set(PageAttribute::ACCESSED, true);
         pte.set(pfn, attr.into());
 
         Ok(())

+ 56 - 14
src/kernel/mem/mm_list.rs

@@ -6,6 +6,7 @@ use super::page_alloc::GlobalPageAlloc;
 use super::paging::AllocZeroed as _;
 use super::{AsMemoryBlock, MMArea, Page};
 use crate::kernel::constants::{EEXIST, EFAULT, EINVAL, ENOMEM};
+use crate::kernel::mem::page_alloc::RawPagePtr;
 use crate::{prelude::*, sync::ArcSwap};
 use alloc::collections::btree_set::BTreeSet;
 use core::fmt;
@@ -256,6 +257,7 @@ impl MMListInner<'_> {
         len: usize,
         mapping: Mapping,
         permission: Permission,
+        is_shared: bool,
     ) -> KResult<()> {
         assert_eq!(at.floor(), at);
         assert_eq!(len & (PAGE_SIZE - 1), 0);
@@ -271,13 +273,33 @@ impl MMListInner<'_> {
             Mapping::File(_) => self.page_table.set_mmapped(range, permission),
         }
 
-        self.areas.insert(MMArea::new(range, mapping, permission));
+        self.areas
+            .insert(MMArea::new(range, mapping, permission, is_shared));
         Ok(())
     }
 }
 
 impl Drop for MMListInner<'_> {
     fn drop(&mut self) {
+        // May buggy
+        for area in &self.areas {
+            if area.is_shared {
+                for pte in self.page_table.iter_user(area.range()) {
+                    let (pfn, _) = pte.take();
+                    let raw_page = RawPagePtr::from(pfn);
+                    if raw_page.refcount().fetch_sub(1, Ordering::Relaxed) == 1 {
+                        // Wrong here
+                        // unsafe { Page::from_raw(pfn) };
+                    }
+                }
+            } else {
+                for pte in self.page_table.iter_user(area.range()) {
+                    let (pfn, _) = pte.take();
+                    unsafe { Page::from_raw(pfn) };
+                }
+            }
+        }
+
         // TODO: Recycle all pages in the page table.
     }
 }
@@ -343,9 +365,15 @@ impl MMList {
             let list_inner = list_inner.lock().await;
 
             for area in list_inner.areas.iter() {
-                list_inner
-                    .page_table
-                    .set_copy_on_write(&mut inner.page_table, area.range());
+                if !area.is_shared {
+                    list_inner
+                        .page_table
+                        .set_copy_on_write(&mut inner.page_table, area.range());
+                } else {
+                    list_inner
+                        .page_table
+                        .set_copied(&mut inner.page_table, area.range());
+                }
             }
         }
 
@@ -507,21 +535,22 @@ impl MMList {
         len: usize,
         mapping: Mapping,
         permission: Permission,
+        is_shared: bool,
     ) -> KResult<VAddr> {
         let inner = self.inner.borrow();
         let mut inner = Task::block_on(inner.lock());
 
         if hint == VAddr::NULL {
             let at = inner.find_available(hint, len).ok_or(ENOMEM)?;
-            inner.mmap(at, len, mapping, permission)?;
+            inner.mmap(at, len, mapping, permission, is_shared)?;
             return Ok(at);
         }
 
-        match inner.mmap(hint, len, mapping.clone(), permission) {
+        match inner.mmap(hint, len, mapping.clone(), permission, is_shared) {
             Ok(()) => Ok(hint),
             Err(EEXIST) => {
                 let at = inner.find_available(hint, len).ok_or(ENOMEM)?;
-                inner.mmap(at, len, mapping, permission)?;
+                inner.mmap(at, len, mapping, permission, is_shared)?;
                 Ok(at)
             }
             Err(err) => Err(err),
@@ -534,9 +563,10 @@ impl MMList {
         len: usize,
         mapping: Mapping,
         permission: Permission,
+        is_shared: bool,
     ) -> KResult<VAddr> {
         Task::block_on(self.inner.borrow().lock())
-            .mmap(at, len, mapping.clone(), permission)
+            .mmap(at, len, mapping.clone(), permission, is_shared)
             .map(|_| at)
     }
 
@@ -571,6 +601,7 @@ impl MMList {
                     write: true,
                     execute: false,
                 },
+                false,
             ));
         }
 
@@ -644,7 +675,8 @@ impl MMList {
                 let page_start = current.floor() + idx * 0x1000;
                 let page_end = page_start + 0x1000;
 
-                area.handle(pte, page_start - area_start)?;
+                // Prepare for the worst case that we might write to the page...
+                area.handle(pte, page_start - area_start, true)?;
 
                 let start_offset;
                 if page_start < current {
@@ -692,6 +724,7 @@ trait PageTableExt {
     fn set_anonymous(&self, range: VRange, permission: Permission);
     fn set_mmapped(&self, range: VRange, permission: Permission);
     fn set_copy_on_write(&self, from: &Self, range: VRange);
+    fn set_copied(&self, from: &Self, range: VRange);
 }
 
 impl PageTableExt for KernelPageTable<'_> {
@@ -715,10 +748,22 @@ impl PageTableExt for KernelPageTable<'_> {
             to.set_copy_on_write(from);
         }
     }
+
+    fn set_copied(&self, from: &Self, range: VRange) {
+        let to_iter = self.iter_user(range);
+        let from_iter = from.iter_user(range);
+
+        for (to, from) in to_iter.zip(from_iter) {
+            let (pfn, attr) = from.get();
+            to.set(pfn, attr);
+        }
+    }
 }
 
 trait PTEExt {
+    // private anonymous
     fn set_anonymous(&mut self, execute: bool);
+    // file mapped or shared anonymous
     fn set_mapped(&mut self, execute: bool);
     fn set_copy_on_write(&mut self, from: &mut Self);
 }
@@ -742,10 +787,7 @@ where
     fn set_mapped(&mut self, execute: bool) {
         // Writable flag is set during page fault handling while executable flag is
         // preserved across page faults, so we set executable flag now.
-        let mut attr = PageAttribute::READ
-            | PageAttribute::USER
-            | PageAttribute::MAPPED
-            | PageAttribute::COPY_ON_WRITE;
+        let mut attr = PageAttribute::READ | PageAttribute::USER | PageAttribute::MAPPED;
         attr.set(PageAttribute::EXECUTE, execute);
 
         self.set(EMPTY_PAGE.clone().into_raw(), T::Attr::from(attr));
@@ -761,7 +803,7 @@ where
             return;
         }
 
-        from_attr.remove(PageAttribute::WRITE);
+        from_attr.remove(PageAttribute::WRITE | PageAttribute::DIRTY);
         from_attr.insert(PageAttribute::COPY_ON_WRITE);
 
         let pfn = unsafe {

+ 14 - 3
src/kernel/mem/mm_list/mapping.rs

@@ -1,23 +1,34 @@
-use crate::kernel::vfs::dentry::Dentry;
+use core::fmt::Debug;
+
+use crate::kernel::vfs::inode::Inode;
 use alloc::sync::Arc;
 use eonix_mm::paging::PAGE_SIZE;
 
 #[derive(Debug, Clone)]
 pub struct FileMapping {
-    pub file: Arc<Dentry>,
+    pub file: Arc<dyn Inode>,
     /// Offset in the file, aligned to 4KB boundary.
     pub offset: usize,
     /// Length of the mapping. Exceeding part will be zeroed.
     pub length: usize,
 }
+
+impl Debug for dyn Inode {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        write!(f, "Inode()")
+    }
+}
+
 #[derive(Debug, Clone)]
 pub enum Mapping {
+    // private anonymous memory
     Anonymous,
+    // file-backed memory or shared anonymous memory(tmp file)
     File(FileMapping),
 }
 
 impl FileMapping {
-    pub fn new(file: Arc<Dentry>, offset: usize, length: usize) -> Self {
+    pub fn new(file: Arc<dyn Inode>, offset: usize, length: usize) -> Self {
         assert_eq!(offset & (PAGE_SIZE - 1), 0);
         Self {
             file,

+ 13 - 3
src/kernel/mem/mm_list/page_fault.rs

@@ -90,8 +90,12 @@ impl MMList {
             .next()
             .expect("If we can find the mapped area, we should be able to find the PTE");
 
-        area.handle(pte, addr.floor() - area.range().start())
-            .map_err(|_| Signal::SIGBUS)?;
+        area.handle(
+            pte,
+            addr.floor() - area.range().start(),
+            error.contains(PageFaultErrorCode::Write),
+        )
+        .map_err(|_| Signal::SIGBUS)?;
 
         flush_tlb(addr.floor().addr());
 
@@ -160,9 +164,15 @@ pub fn handle_kernel_page_fault(
         .next()
         .expect("If we can find the mapped area, we should be able to find the PTE");
 
-    if let Err(_) = area.handle(pte, addr.floor() - area.range().start()) {
+    if let Err(_) = area.handle(
+        pte,
+        addr.floor() - area.range().start(),
+        error.contains(PageFaultErrorCode::Write),
+    ) {
         return Some(try_page_fault_fix(fault_pc, addr));
     }
 
+    flush_tlb(addr.floor().addr());
+
     None
 }

+ 54 - 9
src/kernel/mem/page_alloc/raw_page.rs

@@ -1,18 +1,19 @@
+use crate::kernel::mem::page_cache::PageCacheRawPage;
+use crate::kernel::mem::PhysAccess;
 use buddy_allocator::BuddyRawPage;
 use core::{
     ptr::NonNull,
     sync::atomic::{AtomicU32, AtomicUsize, Ordering},
 };
+use eonix_hal::mm::ArchPhysAccess;
+use eonix_mm::paging::PAGE_SIZE;
 use eonix_mm::{
-    address::{PAddr, VAddr},
-    paging::{RawPage as RawPageTrait, PAGE_SIZE, PFN},
+    address::{PAddr, PhysAccess as _},
+    paging::{RawPage as RawPageTrait, PFN},
 };
 use intrusive_list::{container_of, Link};
 use slab_allocator::SlabRawPage;
 
-use crate::kernel::mem::access::RawPageAccess;
-use crate::kernel::mem::PhysAccess;
-
 const PAGE_ARRAY: NonNull<RawPage> =
     unsafe { NonNull::new_unchecked(0xffffff8040000000 as *mut _) };
 
@@ -32,11 +33,16 @@ impl SlabPageInner {
     }
 }
 
+struct PageCacheInner {
+    valid_size: usize,
+}
+
 pub struct BuddyPageInner {}
 
 enum PageType {
     Buddy(BuddyPageInner),
     Slab(SlabPageInner),
+    PageCache(PageCacheInner),
 }
 
 impl PageType {
@@ -47,6 +53,14 @@ impl PageType {
             unreachable!()
         }
     }
+
+    fn page_cache_data(&mut self) -> &mut PageCacheInner {
+        if let PageType::PageCache(cache_data) = self {
+            return cache_data;
+        } else {
+            unreachable!()
+        }
+    }
 }
 
 pub struct RawPage {
@@ -70,8 +84,8 @@ impl PageFlags {
     pub const PRESENT: u32 = 1 << 0;
     // pub const LOCKED: u32 = 1 << 1;
     pub const BUDDY: u32 = 1 << 2;
-    // pub const SLAB: u32 = 1 << 3;
-    // pub const DIRTY: u32 = 1 << 4;
+    pub const SLAB: u32 = 1 << 3;
+    pub const DIRTY: u32 = 1 << 4;
     pub const FREE: u32 = 1 << 5;
     pub const LOCAL: u32 = 1 << 6;
 
@@ -206,8 +220,17 @@ impl SlabRawPage for RawPagePtr {
     }
 
     fn in_which(ptr: *mut u8) -> RawPagePtr {
-        let vaddr = VAddr::from(ptr as usize & !(PAGE_SIZE - 1));
-        unsafe { vaddr.as_raw_page() }
+        unsafe {
+            // SAFETY: The pointer is allocated from the slab allocator,
+            //         which can't be null.
+            let ptr = NonNull::new_unchecked(ptr);
+
+            // SAFETY: The pointer is valid.
+            let paddr = ArchPhysAccess::from_ptr(ptr);
+            let pfn = PFN::from(paddr);
+
+            RawPagePtr::from(pfn)
+        }
     }
 
     fn allocated_count(&self) -> &mut u32 {
@@ -226,3 +249,25 @@ impl SlabRawPage for RawPagePtr {
         self.as_mut().shared_data = PageType::Slab(SlabPageInner::new(first_free));
     }
 }
+
+impl PageCacheRawPage for RawPagePtr {
+    fn valid_size(&self) -> &mut usize {
+        &mut self.as_mut().shared_data.page_cache_data().valid_size
+    }
+
+    fn is_dirty(&self) -> bool {
+        self.flags().has(PageFlags::DIRTY)
+    }
+
+    fn clear_dirty(&self) {
+        self.flags().clear(PageFlags::DIRTY);
+    }
+
+    fn set_dirty(&self) {
+        self.flags().set(PageFlags::DIRTY);
+    }
+
+    fn cache_init(&self) {
+        self.as_mut().shared_data = PageType::PageCache(PageCacheInner { valid_size: 0 });
+    }
+}

+ 324 - 0
src/kernel/mem/page_cache.rs

@@ -0,0 +1,324 @@
+use super::{paging::AllocZeroed, Page};
+use crate::{
+    io::{Buffer, FillResult, Stream},
+    kernel::mem::page_alloc::RawPagePtr,
+    prelude::KResult,
+    GlobalPageAlloc,
+};
+use align_ext::AlignExt;
+use alloc::{collections::btree_map::BTreeMap, sync::Weak};
+use core::mem::ManuallyDrop;
+use eonix_hal::mm::ArchPhysAccess;
+use eonix_mm::{
+    address::{PAddr, PhysAccess},
+    paging::{PageAlloc, RawPage, PAGE_SIZE, PAGE_SIZE_BITS, PFN},
+};
+use eonix_sync::Mutex;
+
+pub struct PageCache {
+    pages: Mutex<BTreeMap<usize, CachePage>>,
+    backend: Weak<dyn PageCacheBackend>,
+}
+
+unsafe impl Send for PageCache {}
+unsafe impl Sync for PageCache {}
+
+#[derive(Clone, Copy)]
+pub struct CachePage(RawPagePtr);
+
+impl Buffer for CachePage {
+    fn total(&self) -> usize {
+        PAGE_SIZE
+    }
+
+    fn wrote(&self) -> usize {
+        self.valid_size()
+    }
+
+    fn fill(&mut self, data: &[u8]) -> KResult<FillResult> {
+        let valid_size = self.valid_size();
+        let available = &mut self.all_mut()[valid_size..];
+        if available.len() == 0 {
+            return Ok(FillResult::Full);
+        }
+
+        let len = core::cmp::min(data.len(), available.len());
+        available[..len].copy_from_slice(&data[..len]);
+
+        *self.0.valid_size() += len;
+
+        if len < data.len() {
+            Ok(FillResult::Partial(len))
+        } else {
+            Ok(FillResult::Done(len))
+        }
+    }
+}
+
+impl CachePage {
+    pub fn new() -> Self {
+        let page = GlobalPageAlloc.alloc().unwrap();
+        page.cache_init();
+        Self(page)
+    }
+
+    pub fn new_zeroed() -> Self {
+        let page = Page::zeroed();
+        let raw_page_ptr = RawPagePtr::from(page.into_raw());
+
+        raw_page_ptr.cache_init();
+        Self(raw_page_ptr)
+    }
+
+    pub fn valid_size(&self) -> usize {
+        *self.0.valid_size()
+    }
+
+    pub fn set_valid_size(&mut self, valid_size: usize) {
+        *self.0.valid_size() = valid_size;
+    }
+
+    pub fn all(&self) -> &[u8] {
+        unsafe {
+            core::slice::from_raw_parts(
+                // SAFETY: The page is exclusively owned by us, so we can safely access its data.
+                ArchPhysAccess::as_ptr(PAddr::from(PFN::from(self.0))).as_ptr(),
+                PAGE_SIZE,
+            )
+        }
+    }
+
+    pub fn all_mut(&mut self) -> &mut [u8] {
+        unsafe {
+            core::slice::from_raw_parts_mut(
+                // SAFETY: The page is exclusively owned by us, so we can safely access its data.
+                ArchPhysAccess::as_ptr(PAddr::from(PFN::from(self.0))).as_ptr(),
+                PAGE_SIZE,
+            )
+        }
+    }
+
+    pub fn valid_data(&self) -> &[u8] {
+        &self.all()[..self.valid_size()]
+    }
+
+    pub fn is_dirty(&self) -> bool {
+        self.0.is_dirty()
+    }
+
+    pub fn set_dirty(&self) {
+        self.0.set_dirty();
+    }
+
+    pub fn clear_dirty(&self) {
+        self.0.clear_dirty();
+    }
+}
+
+impl PageCache {
+    pub fn new(backend: Weak<dyn PageCacheBackend>) -> Self {
+        Self {
+            pages: Mutex::new(BTreeMap::new()),
+            backend: backend,
+        }
+    }
+
+    pub async fn read(&self, buffer: &mut dyn Buffer, mut offset: usize) -> KResult<usize> {
+        let mut pages = self.pages.lock().await;
+
+        loop {
+            let page_id = offset >> PAGE_SIZE_BITS;
+            let page = pages.get(&page_id);
+
+            match page {
+                Some(page) => {
+                    let inner_offset = offset % PAGE_SIZE;
+
+                    // TODO: still cause unnecessary IO if valid_size < PAGESIZE
+                    //       and fill result is Done
+                    if page.valid_size() == 0
+                        || buffer
+                            .fill(&page.valid_data()[inner_offset..])?
+                            .should_stop()
+                        || buffer.available() == 0
+                    {
+                        break;
+                    }
+
+                    offset += PAGE_SIZE - inner_offset;
+                }
+                None => {
+                    let mut new_page = CachePage::new();
+                    self.backend
+                        .upgrade()
+                        .unwrap()
+                        .read_page(&mut new_page, offset.align_down(PAGE_SIZE))?;
+                    pages.insert(page_id, new_page);
+                }
+            }
+        }
+
+        Ok(buffer.wrote())
+    }
+
+    pub async fn write(&self, stream: &mut dyn Stream, mut offset: usize) -> KResult<usize> {
+        let mut pages = self.pages.lock().await;
+        let old_size = self.backend.upgrade().unwrap().size();
+        let mut wrote = 0;
+
+        loop {
+            let page_id = offset >> PAGE_SIZE_BITS;
+            let page = pages.get_mut(&page_id);
+
+            match page {
+                Some(page) => {
+                    let inner_offset = offset % PAGE_SIZE;
+                    let cursor_end = match stream.poll_data(&mut page.all_mut()[inner_offset..])? {
+                        Some(buf) => {
+                            wrote += buf.len();
+                            inner_offset + buf.len()
+                        }
+                        None => {
+                            break;
+                        }
+                    };
+
+                    if page.valid_size() < cursor_end {
+                        page.set_valid_size(cursor_end);
+                    }
+                    page.set_dirty();
+                    offset += PAGE_SIZE - inner_offset;
+                }
+                None => {
+                    let new_page = if (offset >> PAGE_SIZE_BITS) > (old_size >> PAGE_SIZE_BITS) {
+                        let new_page = CachePage::new_zeroed();
+                        new_page
+                    } else {
+                        let mut new_page = CachePage::new();
+                        self.backend
+                            .upgrade()
+                            .unwrap()
+                            .read_page(&mut new_page, offset.align_down(PAGE_SIZE))?;
+                        new_page
+                    };
+
+                    pages.insert(page_id, new_page);
+                }
+            }
+        }
+
+        Ok(wrote)
+    }
+
+    pub async fn fsync(&self) -> KResult<()> {
+        let pages = self.pages.lock().await;
+        for (page_id, page) in pages.iter() {
+            if page.is_dirty() {
+                self.backend
+                    .upgrade()
+                    .unwrap()
+                    .write_page(page, page_id << PAGE_SIZE_BITS)?;
+                page.clear_dirty();
+            }
+        }
+        Ok(())
+    }
+
+    // This function is used for extend write or truncate
+    pub async fn resize(&self, new_size: usize) -> KResult<()> {
+        let mut pages = self.pages.lock().await;
+        let old_size = self.backend.upgrade().unwrap().size();
+
+        if new_size < old_size {
+            let begin = new_size.align_down(PAGE_SIZE) >> PAGE_SIZE_BITS;
+            let end = old_size.align_up(PAGE_SIZE) >> PAGE_SIZE_BITS;
+
+            for page_id in begin..end {
+                pages.remove(&page_id);
+            }
+        } else if new_size > old_size {
+            let begin = old_size.align_down(PAGE_SIZE) >> PAGE_SIZE_BITS;
+            let end = new_size.align_up(PAGE_SIZE) >> PAGE_SIZE_BITS;
+
+            pages.remove(&begin);
+
+            for page_id in begin..end {
+                let mut new_page = CachePage::new_zeroed();
+
+                if page_id != end - 1 {
+                    new_page.set_valid_size(PAGE_SIZE);
+                } else {
+                    new_page.set_valid_size(new_size % PAGE_SIZE);
+                }
+                new_page.set_dirty();
+                pages.insert(page_id, new_page);
+            }
+        }
+
+        Ok(())
+    }
+
+    pub async fn with_page<F, O>(&self, offset: usize, func: F) -> KResult<Option<O>>
+    where
+        F: FnOnce(&Page, &CachePage) -> O,
+    {
+        let offset_aligin = offset.align_down(PAGE_SIZE);
+        let page_id = offset_aligin >> PAGE_SIZE_BITS;
+        let size = self.backend.upgrade().unwrap().size();
+
+        if offset_aligin > size {
+            return Ok(None);
+        }
+
+        let mut pages = self.pages.lock().await;
+
+        let raw_page_ptr = match pages.get(&page_id) {
+            Some(CachePage(raw_page_ptr)) => *raw_page_ptr,
+            None => {
+                let mut new_page = CachePage::new();
+                self.backend
+                    .upgrade()
+                    .unwrap()
+                    .read_page(&mut new_page, offset_aligin)?;
+                pages.insert(page_id, new_page);
+                new_page.0
+            }
+        };
+
+        unsafe {
+            let page = ManuallyDrop::new(Page::from_raw_unchecked(PFN::from(raw_page_ptr)));
+
+            Ok(Some(func(&page, &CachePage(raw_page_ptr))))
+        }
+    }
+}
+
+// with this trait, "page cache" and "block cache" are unified,
+// for fs, offset is file offset (floor algin to PAGE_SIZE)
+// for blkdev, offset is block idx (floor align to PAGE_SIZE / BLK_SIZE)
+// Oh no, this would make unnecessary cache
+pub trait PageCacheBackend {
+    fn read_page(&self, page: &mut CachePage, offset: usize) -> KResult<usize>;
+
+    fn write_page(&self, page: &CachePage, offset: usize) -> KResult<usize>;
+
+    fn size(&self) -> usize;
+}
+
+pub trait PageCacheRawPage: RawPage {
+    fn valid_size(&self) -> &mut usize;
+
+    fn is_dirty(&self) -> bool;
+
+    fn set_dirty(&self);
+
+    fn clear_dirty(&self);
+
+    fn cache_init(&self);
+}
+
+impl Drop for PageCache {
+    fn drop(&mut self) {
+        let _ = self.fsync();
+    }
+}

+ 2 - 2
src/kernel/pcie.rs

@@ -4,8 +4,8 @@ mod error;
 mod header;
 mod init;
 
-pub use device::PCIDevice;
+pub use device::{PCIDevice, SegmentGroup};
 pub use driver::{register_driver, PCIDriver};
 pub use error::PciError;
-pub use header::{CommonHeader, Header};
+pub use header::{Bar, CommonHeader, Header};
 pub use init::init_pcie;

+ 111 - 45
src/kernel/pcie/device.rs

@@ -1,22 +1,18 @@
-use super::{CommonHeader, Header};
+use super::{
+    header::{Bar, Command},
+    CommonHeader, Header,
+};
 use crate::kernel::mem::PhysAccess as _;
-use alloc::sync::Arc;
-use core::{ops::RangeInclusive, sync::atomic::Ordering};
-use eonix_hal::fence::memory_barrier;
-use eonix_mm::address::PAddr;
-use eonix_sync::{LazyLock, Spin};
-use intrusive_collections::{intrusive_adapter, KeyAdapter, RBTree, RBTreeAtomicLink};
+use align_ext::AlignExt;
+use alloc::{collections::btree_map::BTreeMap, sync::Arc, vec::Vec};
+use core::{num::NonZero, ops::RangeInclusive};
+use eonix_mm::address::{Addr, PAddr, PRange};
+use eonix_sync::Spin;
 
-pub(super) static PCIE_DEVICES: LazyLock<Spin<RBTree<PCIDeviceAdapter>>> =
-    LazyLock::new(|| Spin::new(RBTree::new(PCIDeviceAdapter::new())));
+pub(super) static PCIE_DEVICES: Spin<BTreeMap<u32, Vec<Arc<PCIDevice>>>> =
+    Spin::new(BTreeMap::new());
 
-intrusive_adapter!(
-    pub PCIDeviceAdapter = Arc<PCIDevice<'static>> : PCIDevice { link: RBTreeAtomicLink }
-);
-
-#[allow(dead_code)]
 pub struct PCIDevice<'a> {
-    link: RBTreeAtomicLink,
     segment_group: SegmentGroup,
     config_space: ConfigSpace,
     pub header: Header<'a>,
@@ -32,14 +28,18 @@ pub struct SegmentGroup {
     base_address: PAddr,
 }
 
-#[allow(dead_code)]
 #[derive(Clone)]
 pub struct ConfigSpace {
-    bus: u8,
-    device: u8,
-    function: u8,
+    pub bus: u8,
+    pub device: u8,
+    pub function: u8,
 
-    base: PAddr,
+    pub base: PAddr,
+}
+
+pub struct PciMemoryAllocator {
+    start: u32,
+    end: u32,
 }
 
 impl SegmentGroup {
@@ -66,20 +66,30 @@ impl SegmentGroup {
             .map(move |bus| {
                 (0..32)
                     .map(move |device| {
-                        (0..8).map(move |function| ConfigSpace {
-                            bus,
-                            device,
-                            function,
-                            base: self.base_address
-                                + ((bus as usize) << 20)
-                                + ((device as usize) << 15)
-                                + ((function as usize) << 12),
+                        (0..8).map(move |function| {
+                            self.get_conf_space(bus, device, function).unwrap()
                         })
                     })
                     .flatten()
             })
             .flatten()
     }
+
+    pub fn get_conf_space(&self, bus: u8, device: u8, function: u8) -> Option<ConfigSpace> {
+        if self.bus_range.contains(&bus) {
+            Some(ConfigSpace {
+                bus,
+                device,
+                function,
+                base: self.base_address
+                    + ((bus as usize) << 20)
+                    + ((device as usize) << 15)
+                    + ((function as usize) << 12),
+            })
+        } else {
+            None
+        }
+    }
 }
 
 impl ConfigSpace {
@@ -119,36 +129,92 @@ impl PCIDevice<'static> {
         config_space: ConfigSpace,
         header: Header<'static>,
     ) -> Arc<Self> {
-        let common_header = header.common_header();
-
         Arc::new(PCIDevice {
-            link: RBTreeAtomicLink::new(),
             segment_group,
             config_space,
-            vendor_id: common_header.vendor_id,
-            device_id: common_header.device_id,
+            vendor_id: header.vendor_id,
+            device_id: header.device_id,
             header,
         })
     }
+
+    pub fn vendor_device(&self) -> u32 {
+        (self.vendor_id as u32) << 16 | self.device_id as u32
+    }
 }
 
-#[allow(dead_code)]
 impl PCIDevice<'_> {
-    pub fn enable_bus_mastering(&self) {
-        let header = self.header.common_header();
-        header.command().fetch_or(0x04, Ordering::Relaxed);
+    pub fn configure_io(&self, allocator: &mut PciMemoryAllocator) {
+        self.header
+            .command()
+            .clear(Command::IO_ACCESS_ENABLE | Command::MEMORY_ACCESS_ENABLE);
+
+        if let Header::Endpoint(header) = self.header {
+            for mut bar in header.bars().iter() {
+                match bar.get() {
+                    Bar::MemoryMapped32 { base: None, size } => bar.set(Bar::MemoryMapped32 {
+                        base: Some(
+                            allocator
+                                .allocate(size as usize)
+                                .expect("Failed to allocate BAR memory"),
+                        ),
+                        size,
+                    }),
+                    Bar::MemoryMapped64 { base: None, size } => bar.set(Bar::MemoryMapped64 {
+                        base: Some(
+                            allocator
+                                .allocate(size as usize)
+                                .map(|base| NonZero::new(base.get() as u64))
+                                .flatten()
+                                .expect("Failed to allocate BAR memory"),
+                        ),
+                        size,
+                    }),
+                    _ => {}
+                }
+            }
+        }
+
+        self.header.command().set(
+            Command::IO_ACCESS_ENABLE | Command::MEMORY_ACCESS_ENABLE | Command::BUS_MASTER_ENABLE,
+        );
+    }
 
-        memory_barrier();
+    pub fn config_space(&self) -> &ConfigSpace {
+        &self.config_space
+    }
+
+    pub fn segment_group(&self) -> &SegmentGroup {
+        &self.segment_group
     }
 }
 
-impl<'a> KeyAdapter<'a> for PCIDeviceAdapter {
-    type Key = u32;
+impl PciMemoryAllocator {
+    pub fn new(range: PRange) -> Self {
+        let start = range.start().addr() as u32;
+        let end = range.end().addr() as u32;
+
+        Self { start, end }
+    }
+
+    pub fn allocate(&mut self, size: usize) -> Option<NonZero<u32>> {
+        let size = size.next_power_of_two().try_into().ok()?;
+        let real_start = self.start.align_up(size);
+
+        if size == 0 || size > self.end - real_start {
+            return None;
+        }
+
+        let base = self.start;
+        self.start += size;
+
+        eonix_log::println_trace!(
+            "trace_pci",
+            "PciMemoryAllocator: Allocated {} bytes at {:#x}",
+            size,
+            base
+        );
 
-    fn get_key(
-        &self,
-        value: &'a <Self::PointerOps as intrusive_collections::PointerOps>::Value,
-    ) -> Self::Key {
-        ((value.vendor_id as u32) << 16) | value.device_id as u32
+        NonZero::new(base)
     }
 }

+ 5 - 3
src/kernel/pcie/driver.rs

@@ -28,9 +28,11 @@ pub fn register_driver(driver: impl PCIDriver + 'static) -> KResult<()> {
         btree_map::Entry::Occupied(_) => Err(EEXIST)?,
     };
 
-    let device = PCIE_DEVICES.lock().find(&index).clone_pointer();
-    if let Some(device) = device {
-        driver.handle_device(device)?;
+    let devices = PCIE_DEVICES.lock().get(&index).cloned();
+    if let Some(devices) = devices {
+        for device in devices {
+            driver.handle_device(device)?;
+        }
     };
 
     Ok(())

+ 321 - 19
src/kernel/pcie/header.rs

@@ -1,4 +1,118 @@
-use core::{mem::offset_of, ops::Deref, sync::atomic::AtomicU32};
+use bitflags::bitflags;
+use core::{
+    marker::PhantomData,
+    num::NonZero,
+    ops::{BitAnd, BitOr, Deref, Not},
+    sync::atomic::{AtomicU16, AtomicU32, Ordering},
+};
+use eonix_hal::fence::memory_barrier;
+
+pub trait BitFlag: Sized + Copy {
+    type Value: BitAnd<Output = Self::Value>
+        + BitOr<Output = Self::Value>
+        + Not<Output = Self::Value>;
+
+    fn from_bits(value: Self::Value) -> Self;
+    fn into_bits(self) -> Self::Value;
+
+    fn and(self, other: Self) -> Self {
+        Self::from_bits(self.into_bits() & other.into_bits())
+    }
+
+    fn or(self, other: Self) -> Self {
+        Self::from_bits(self.into_bits() | other.into_bits())
+    }
+
+    fn not(self) -> Self {
+        Self::from_bits(!self.into_bits())
+    }
+}
+
+pub struct Register<'a, T>
+where
+    T: BitFlag,
+{
+    register: &'a AtomicU16,
+    _phantom: PhantomData<T>,
+}
+
+bitflags! {
+    #[derive(Debug, Clone, Copy)]
+    pub struct Command: u16 {
+        /// I/O Access Enable.
+        const IO_ACCESS_ENABLE = 1 << 0;
+        /// Memory Access Enable.
+        const MEMORY_ACCESS_ENABLE = 1 << 1;
+        /// Bus Master Enable.
+        const BUS_MASTER_ENABLE = 1 << 2;
+        /// Special Cycle Enable.
+        const SPECIAL_CYCLE_ENABLE = 1 << 3;
+        /// Memory Write and Invalidate Enable.
+        const MEMORY_WRITE_AND_INVALIDATE_ENABLE = 1 << 4;
+        /// Palette Snooping Enable.
+        const PALETTE_SNOOPING_ENABLE = 1 << 5;
+        /// Parity Error Response Enable.
+        const PARITY_ERROR_RESPONSE_ENABLE = 1 << 6;
+        /// SERR# Enable.
+        const SERR_ENABLE = 1 << 8;
+        /// Fast Back-to-Back Enable.
+        const FAST_BACK_TO_BACK_ENABLE = 1 << 9;
+        /// Interrupt Disable.
+        const INTERRUPT_DISABLE = 1 << 10;
+    }
+
+    #[derive(Debug, Clone, Copy)]
+    pub struct Status: u16 {
+        /// Interrupt Status.
+        const INTERRUPT_STATUS = 1 << 3;
+        /// Capabilities List.
+        const CAPABILITIES_LIST = 1 << 4;
+        /// 66 MHz Capable.
+        const SIXTY_SIX_MHZ_CAPABLE = 1 << 5;
+        /// Fast Back-to-Back Capable.
+        const FAST_BACK_TO_BACK_CAPABLE = 1 << 7;
+        /// Master Data Parity Error.
+        const MASTER_DATA_PARITY_ERROR = 1 << 8;
+        /// Device Select Timing.
+        const DEVICE_SELECT_TIMING = (1 << 9) | (1 << 10);
+        /// Signaled Target Abort.
+        const SIGNALLED_TARGET_ABORT = 1 << 11;
+        /// Received Target Abort.
+        const RECEIVED_TARGET_ABORT = 1 << 12;
+        /// Received Master Abort.
+        const RECEIVED_MASTER_ABORT = 1 << 13;
+        /// Signaled System Error.
+        const SIGNALLED_SYSTEM_ERROR = 1 << 14;
+        /// Detected Parity Error.
+        const DETECTED_PARITY_ERROR = 1 << 15;
+    }
+}
+
+#[derive(Debug, Clone, Copy)]
+pub enum Bar {
+    IOMapped {
+        base: Option<NonZero<u32>>,
+        size: u32,
+    },
+    MemoryMapped32 {
+        base: Option<NonZero<u32>>,
+        size: u32,
+    },
+    MemoryMapped64 {
+        base: Option<NonZero<u64>>,
+        size: u64,
+    },
+    None,
+}
+
+pub struct BarsEntry<'a> {
+    reg1: &'a AtomicU32,
+    reg2: Option<&'a AtomicU32>,
+}
+
+pub struct Bars<'a> {
+    bars: &'a [AtomicU32],
+}
 
 #[repr(C)]
 pub struct CommonHeader {
@@ -25,7 +139,7 @@ pub struct CommonHeader {
 #[repr(C)]
 pub struct Endpoint {
     _header: CommonHeader,
-    pub bars: [u32; 6],
+    _bars: [AtomicU32; 6],
     pub cardbus_cis_pointer: u32,
     pub subsystem_vendor_id: u16,
     pub subsystem_id: u16,
@@ -43,31 +157,219 @@ pub enum Header<'a> {
     Endpoint(&'a Endpoint),
 }
 
-#[allow(dead_code)]
+impl BitFlag for Command {
+    type Value = u16;
+
+    fn from_bits(value: Self::Value) -> Self {
+        Command::from_bits_retain(value)
+    }
+
+    fn into_bits(self) -> Self::Value {
+        self.bits()
+    }
+}
+
+impl BitFlag for Status {
+    type Value = u16;
+
+    fn from_bits(value: Self::Value) -> Self {
+        Status::from_bits_retain(value)
+    }
+
+    fn into_bits(self) -> Self::Value {
+        self.bits()
+    }
+}
+
+impl<T> Register<'_, T>
+where
+    T: BitFlag<Value = u16>,
+{
+    pub fn read(&self) -> T {
+        memory_barrier();
+        let value = self.register.load(Ordering::Relaxed);
+        memory_barrier();
+
+        T::from_bits(value)
+    }
+
+    pub fn write(&self, value: T) {
+        memory_barrier();
+        self.register.store(value.into_bits(), Ordering::Relaxed);
+        memory_barrier();
+    }
+
+    pub fn set(&self, value: T) {
+        memory_barrier();
+        let current = self.read();
+        self.write(current.or(value));
+        memory_barrier();
+    }
+
+    pub fn clear(&self, value: T) {
+        memory_barrier();
+        let current = self.read();
+        self.write(current.and(value.not()));
+        memory_barrier();
+    }
+}
+
 impl CommonHeader {
-    pub fn command(&self) -> &AtomicU32 {
-        unsafe {
-            AtomicU32::from_ptr(
-                (&raw const *self)
-                    .byte_offset(offset_of!(CommonHeader, _command) as isize)
-                    .cast::<u32>() as *mut u32,
-            )
+    pub fn command(&self) -> Register<Command> {
+        Register {
+            register: unsafe { AtomicU16::from_ptr((&raw const self._command) as *mut u16) },
+            _phantom: PhantomData,
         }
     }
 
-    pub fn status(&self) -> &AtomicU32 {
-        unsafe {
-            AtomicU32::from_ptr(
-                (&raw const *self)
-                    .byte_offset(offset_of!(CommonHeader, _status) as isize)
-                    .cast::<u32>() as *mut u32,
-            )
+    pub fn status(&self) -> Register<Status> {
+        Register {
+            register: unsafe { AtomicU16::from_ptr((&raw const self._status) as *mut u16) },
+            _phantom: PhantomData,
         }
     }
 }
 
-impl Header<'_> {
-    pub fn common_header(&self) -> &CommonHeader {
+impl Bars<'_> {
+    pub fn iter(&self) -> impl Iterator<Item = BarsEntry> + '_ {
+        struct BarsIterator<'a> {
+            bars: &'a [AtomicU32],
+            pos: usize,
+        }
+
+        impl<'a> Iterator for BarsIterator<'a> {
+            type Item = BarsEntry<'a>;
+
+            fn next(&mut self) -> Option<Self::Item> {
+                if self.pos >= self.bars.len() {
+                    return None;
+                }
+
+                let reg1 = &self.bars[self.pos];
+                let is_64bit = (reg1.load(Ordering::Relaxed) & 4) == 4;
+
+                let reg2 = if is_64bit {
+                    self.pos += 1;
+                    Some(&self.bars[self.pos])
+                } else {
+                    None
+                };
+
+                self.pos += 1;
+
+                Some(BarsEntry { reg1, reg2 })
+            }
+        }
+
+        BarsIterator {
+            bars: self.bars,
+            pos: 0,
+        }
+    }
+}
+
+impl BarsEntry<'_> {
+    pub fn get(&self) -> Bar {
+        let reg1_value = self.reg1.load(Ordering::Relaxed);
+        let is_mmio = (reg1_value & 1) == 0;
+        let is_64bit = (reg1_value & 4) == 4;
+
+        if !is_mmio {
+            self.reg1.store(!0, Ordering::Relaxed);
+            memory_barrier();
+
+            let size = NonZero::new(self.reg1.load(Ordering::Relaxed) & !0x3);
+            let bar = size.map(|size| Bar::IOMapped {
+                base: NonZero::new(reg1_value & !0x3),
+                size: !size.get() + 1,
+            });
+
+            self.reg1.store(reg1_value, Ordering::Relaxed);
+            memory_barrier();
+
+            return bar.unwrap_or(Bar::None);
+        }
+
+        if is_64bit {
+            let reg2 = self.reg2.expect("64-bit BARs must have a second register");
+
+            let reg2_value = reg2.load(Ordering::Relaxed);
+
+            self.reg1.store(!0, Ordering::Relaxed);
+            reg2.store(!0, Ordering::Relaxed);
+            memory_barrier();
+
+            let size = (self.reg1.load(Ordering::Relaxed) as u64 & !0xf)
+                | ((reg2.load(Ordering::Relaxed) as u64) << 32);
+            let bar = NonZero::new(size).map(|size| Bar::MemoryMapped64 {
+                base: NonZero::new((reg1_value as u64 & !0xf) | ((reg2_value as u64) << 32)),
+                size: !size.get() + 1,
+            });
+
+            self.reg1.store(reg1_value, Ordering::Relaxed);
+            reg2.store(reg2_value, Ordering::Relaxed);
+            memory_barrier();
+
+            bar.unwrap_or(Bar::None)
+        } else {
+            self.reg1.store(!0, Ordering::Relaxed);
+            memory_barrier();
+
+            let size = NonZero::new(self.reg1.load(Ordering::Relaxed) & !0xf);
+            let bar = size.map(|size| Bar::MemoryMapped32 {
+                base: NonZero::new(reg1_value & !0xf),
+                size: !size.get() + 1,
+            });
+
+            self.reg1.store(reg1_value, Ordering::Relaxed);
+            memory_barrier();
+
+            bar.unwrap_or(Bar::None)
+        }
+    }
+
+    pub fn set(&mut self, bar: Bar) {
+        match bar {
+            Bar::None => panic!("Cannot set a BAR to None"),
+            Bar::IOMapped { base, .. } => {
+                let old_value = self.reg1.load(Ordering::Relaxed);
+                let base = base.map_or(0, NonZero::get) & !0x3;
+
+                self.reg1.store((old_value & 0x3) | base, Ordering::Relaxed);
+                memory_barrier();
+            }
+            Bar::MemoryMapped32 { base, .. } => {
+                let old_value = self.reg1.load(Ordering::Relaxed);
+                let base = base.map_or(0, NonZero::get) & !0xf;
+
+                self.reg1.store((old_value & 0xf) | base, Ordering::Relaxed);
+                memory_barrier();
+            }
+            Bar::MemoryMapped64 { base, .. } => {
+                let reg2 = self.reg2.expect("64-bit BARs must have a second register");
+                let old_value1 = self.reg1.load(Ordering::Relaxed);
+                let old_value2 = reg2.load(Ordering::Relaxed);
+                let base = base.map_or(0, NonZero::get) & !0xf;
+
+                self.reg1
+                    .store((old_value1 & 0xf) | base as u32, Ordering::Relaxed);
+                reg2.store(old_value2 | (base >> 32) as u32, Ordering::Relaxed);
+                memory_barrier();
+            }
+        }
+    }
+}
+
+impl Endpoint {
+    pub fn bars(&self) -> Bars<'_> {
+        Bars { bars: &self._bars }
+    }
+}
+
+impl Deref for Header<'_> {
+    type Target = CommonHeader;
+
+    fn deref(&self) -> &Self::Target {
         match self {
             Header::Unknown(header) => header,
             Header::Endpoint(header) => &header,

+ 59 - 7
src/kernel/pcie/init.rs

@@ -2,8 +2,11 @@ use super::{
     device::{PCIDevice, SegmentGroup, PCIE_DEVICES},
     error::PciError,
 };
-use crate::kernel::mem::PhysAccess as _;
+use crate::kernel::{mem::PhysAccess as _, pcie::device::PciMemoryAllocator};
 use acpi::{AcpiHandler, PhysicalMapping};
+use alloc::collections::btree_map::Entry;
+use alloc::vec;
+use eonix_log::println_trace;
 use eonix_mm::address::PAddr;
 
 #[derive(Clone)]
@@ -30,12 +33,15 @@ pub fn init_pcie() -> Result<(), PciError> {
     #[cfg(target_arch = "x86_64")]
     {
         use acpi::{AcpiTables, PciConfigRegions};
+        use eonix_mm::address::PAddr;
 
         let acpi_tables = unsafe {
             // SAFETY: Our impl should be correct.
             AcpiTables::search_for_rsdp_bios(AcpiHandlerImpl)?
         };
 
+        let mut allocator = PciMemoryAllocator::new(PRange::from(PAddr::from(0)));
+
         let conf_regions = PciConfigRegions::new(&acpi_tables)?;
         for region in conf_regions.iter() {
             let segment_group = SegmentGroup::from_entry(&region);
@@ -44,19 +50,57 @@ pub fn init_pcie() -> Result<(), PciError> {
                 if let Some(header) = config_space.header() {
                     let pci_device = PCIDevice::new(segment_group.clone(), config_space, header);
 
-                    PCIE_DEVICES.lock().insert(pci_device);
+                    pci_device.configure_io(&mut allocator);
+
+                    match PCIE_DEVICES.lock().entry(pci_device.vendor_device()) {
+                        Entry::Vacant(vacant_entry) => {
+                            vacant_entry.insert(vec![pci_device]);
+                        }
+                        Entry::Occupied(mut occupied_entry) => {
+                            occupied_entry.get_mut().push(pci_device);
+                        }
+                    }
                 }
             }
         }
     }
 
-    #[cfg(target_arch = "riscv64")]
+    #[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))]
     {
-        use crate::kernel::constants::{EINVAL, ENOENT};
+        use crate::kernel::constants::{EINVAL, EIO, ENOENT};
         use eonix_hal::arch_exported::fdt::FDT;
         use eonix_mm::address::PRange;
 
-        let pcie_node = FDT.find_node("/soc/pci").ok_or(ENOENT)?;
+        let pcie_node = FDT
+            .find_compatible(&["pci-host-ecam-generic"])
+            .ok_or(ENOENT)?;
+
+        let mmio_range = {
+            let ranges = pcie_node.property("ranges").ok_or(EIO)?;
+            ranges
+                .value
+                .chunks(28)
+                .map(|entry| {
+                    let pci_address = u64::from_be_bytes(entry[4..12].try_into().unwrap());
+                    let cpu_address = u64::from_be_bytes(entry[12..20].try_into().unwrap());
+                    let size = u64::from_be_bytes(entry[20..28].try_into().unwrap());
+
+                    println_trace!(
+                        "trace_pci",
+                        "PCIe range: PCI address = {:#x}, CPU address = {:#x}, size = {:#x}",
+                        pci_address,
+                        cpu_address,
+                        size
+                    );
+
+                    PRange::from(PAddr::from(cpu_address as usize)).grow(size as usize)
+                })
+                .max_by(|lhs, rhs| lhs.len().cmp(&rhs.len()))
+                .expect("No valid PCIe range found")
+        };
+
+        let mut allocator = PciMemoryAllocator::new(mmio_range);
+
         let bus_range = pcie_node.property("bus-range").ok_or(ENOENT)?;
         let reg = pcie_node.reg().ok_or(EINVAL)?.next().ok_or(EINVAL)?;
 
@@ -81,8 +125,16 @@ pub fn init_pcie() -> Result<(), PciError> {
         for config_space in segment_group.iter() {
             if let Some(header) = config_space.header() {
                 let pci_device = PCIDevice::new(segment_group.clone(), config_space, header);
-
-                PCIE_DEVICES.lock().insert(pci_device);
+                pci_device.configure_io(&mut allocator);
+
+                match PCIE_DEVICES.lock().entry(pci_device.vendor_device()) {
+                    Entry::Vacant(vacant_entry) => {
+                        vacant_entry.insert(vec![pci_device]);
+                    }
+                    Entry::Occupied(mut occupied_entry) => {
+                        occupied_entry.get_mut().push(pci_device);
+                    }
+                }
             }
         }
     }

+ 12 - 0
src/kernel/syscall.rs

@@ -62,6 +62,18 @@ impl SyscallRetVal for usize {
     }
 }
 
+impl SyscallRetVal for isize {
+    fn into_retval(self) -> Option<usize> {
+        Some(self as usize)
+    }
+}
+
+impl SyscallRetVal for i32 {
+    fn into_retval(self) -> Option<usize> {
+        Some(self as usize)
+    }
+}
+
 impl SyscallRetVal for SyscallNoReturn {
     fn into_retval(self) -> Option<usize> {
         None

+ 75 - 6
src/kernel/syscall/file_rw.rs

@@ -1,9 +1,12 @@
+use core::time::Duration;
+
 use super::FromSyscallArg;
 use crate::io::IntoStream;
 use crate::kernel::constants::{
-    EBADF, EFAULT, EINVAL, ENOENT, ENOTDIR, SEEK_CUR, SEEK_END, SEEK_SET, S_IFBLK, S_IFCHR,
+    EBADF, EFAULT, EINVAL, ENOENT, ENOSYS, ENOTDIR, SEEK_CUR, SEEK_END, SEEK_SET, S_IFBLK, S_IFCHR,
 };
 use crate::kernel::task::Thread;
+use crate::kernel::timer::sleep;
 use crate::kernel::vfs::filearray::FD;
 use crate::{
     io::{Buffer, BufferFill},
@@ -25,7 +28,8 @@ use eonix_runtime::task::Task;
 use posix_types::ctypes::{Long, PtrT};
 use posix_types::namei::RenameFlags;
 use posix_types::open::{AtFlags, OpenFlags};
-use posix_types::signal::SigSet;
+use posix_types::poll::FDSet;
+use posix_types::signal::{SigSet, Signal};
 use posix_types::stat::Stat;
 use posix_types::stat::{StatX, TimeSpec};
 use posix_types::syscall_no::*;
@@ -73,7 +77,20 @@ fn dentry_from(
 fn read(fd: FD, buffer: *mut u8, bufsize: usize) -> KResult<usize> {
     let mut buffer = UserBuffer::new(buffer, bufsize)?;
 
-    Task::block_on(thread.files.get(fd).ok_or(EBADF)?.read(&mut buffer))
+    Task::block_on(thread.files.get(fd).ok_or(EBADF)?.read(&mut buffer, None))
+}
+
+#[eonix_macros::define_syscall(SYS_PREAD64)]
+fn pread64(fd: FD, buffer: *mut u8, bufsize: usize, offset: usize) -> KResult<usize> {
+    let mut buffer = UserBuffer::new(buffer, bufsize)?;
+
+    Task::block_on(
+        thread
+            .files
+            .get(fd)
+            .ok_or(EBADF)?
+            .read(&mut buffer, Some(offset)),
+    )
 }
 
 #[eonix_macros::define_syscall(SYS_WRITE)]
@@ -81,7 +98,21 @@ fn write(fd: FD, buffer: *const u8, count: usize) -> KResult<usize> {
     let buffer = CheckedUserPointer::new(buffer, count)?;
     let mut stream = buffer.into_stream();
 
-    Task::block_on(thread.files.get(fd).ok_or(EBADF)?.write(&mut stream))
+    Task::block_on(thread.files.get(fd).ok_or(EBADF)?.write(&mut stream, None))
+}
+
+#[eonix_macros::define_syscall(SYS_PWRITE64)]
+fn pwrite64(fd: FD, buffer: *const u8, count: usize, offset: usize) -> KResult<usize> {
+    let buffer = CheckedUserPointer::new(buffer, count)?;
+    let mut stream = buffer.into_stream();
+
+    Task::block_on(
+        thread
+            .files
+            .get(fd)
+            .ok_or(EBADF)?
+            .write(&mut stream, Some(offset)),
+    )
 }
 
 #[eonix_macros::define_syscall(SYS_OPENAT)]
@@ -229,6 +260,12 @@ fn mkdir(pathname: *const u8, mode: u32) -> KResult<()> {
     sys_mkdirat(thread, FD::AT_FDCWD, pathname, mode)
 }
 
+#[eonix_macros::define_syscall(SYS_FTRUNCATE64)]
+fn truncate64(fd: FD, length: usize) -> KResult<()> {
+    let file = thread.files.get(fd).ok_or(EBADF)?;
+    file.as_path().ok_or(EBADF)?.truncate(length)
+}
+
 #[cfg(target_arch = "x86_64")]
 #[eonix_macros::define_syscall(SYS_TRUNCATE)]
 fn truncate(pathname: *const u8, length: usize) -> KResult<()> {
@@ -353,7 +390,7 @@ fn readv(fd: FD, iov_user: *const IoVec, iovcnt: u32) -> KResult<usize> {
     let mut tot = 0usize;
     for mut buffer in iov_buffers.into_iter() {
         // TODO!!!: `readv`
-        let nread = Task::block_on(file.read(&mut buffer))?;
+        let nread = Task::block_on(file.read(&mut buffer, None))?;
         tot += nread;
 
         if nread != buffer.total() {
@@ -389,7 +426,7 @@ fn writev(fd: FD, iov_user: *const IoVec, iovcnt: u32) -> KResult<usize> {
 
     let mut tot = 0usize;
     for mut stream in iov_streams.into_iter() {
-        let nread = Task::block_on(file.write(&mut stream))?;
+        let nread = Task::block_on(file.write(&mut stream, None))?;
         tot += nread;
 
         if nread == 0 || !stream.is_drained() {
@@ -495,6 +532,38 @@ fn ppoll(
     do_poll(thread, fds, nfds, 0)
 }
 
+#[eonix_macros::define_syscall(SYS_PSELECT6)]
+fn pselect6(
+    nfds: u32,
+    _readfds: *mut FDSet,
+    _writefds: *mut FDSet,
+    _exceptfds: *mut FDSet,
+    timeout: *mut TimeSpec,
+    _sigmask: *const (),
+) -> KResult<usize> {
+    // According to [pthread6(2)](https://linux.die.net/man/2/pselect6):
+    // Some code calls select() with all three sets empty, nfds zero, and
+    // a non-NULL timeout as a fairly portable way to sleep with subsecond precision.
+    if nfds != 0 {
+        thread.raise(Signal::SIGSYS);
+        return Err(ENOSYS);
+    }
+
+    let timeout = UserPointerMut::new(timeout)?;
+    
+    // Read here to check for invalid pointers.
+    let _timeout_value = timeout.read()?;
+
+    Task::block_on(sleep(Duration::from_millis(10)));
+
+    timeout.write(TimeSpec {
+        tv_sec: 0,
+        tv_nsec: 0,
+    })?;
+
+    Ok(0)
+}
+
 #[cfg(target_arch = "x86_64")]
 #[eonix_macros::define_syscall(SYS_POLL)]
 fn poll(fds: *mut UserPollFd, nfds: u32, timeout: u32) -> KResult<u32> {

+ 141 - 46
src/kernel/syscall/mm.rs

@@ -1,6 +1,9 @@
 use super::FromSyscallArg;
-use crate::kernel::constants::{EINVAL, ENOMEM};
+use crate::fs::shm::{gen_shm_id, ShmFlags, IPC_PRIVATE, SHM_MANAGER};
+use crate::kernel::constants::{EBADF, EEXIST, EINVAL, ENOENT, ENOMEM};
+use crate::kernel::mem::FileMapping;
 use crate::kernel::task::Thread;
+use crate::kernel::vfs::filearray::FD;
 use crate::{
     kernel::{
         constants::{UserMmapFlags, UserMmapProtocol},
@@ -28,6 +31,7 @@ impl FromSyscallArg for UserMmapFlags {
 
 /// Check whether we are doing an implemented function.
 /// If `condition` is false, return `Err(err)`.
+#[allow(unused)]
 fn check_impl(condition: bool, err: u32) -> KResult<()> {
     if !condition {
         Err(err)
@@ -42,76 +46,74 @@ fn do_mmap2(
     len: usize,
     prot: UserMmapProtocol,
     flags: UserMmapFlags,
-    fd: u32,
+    fd: FD,
     pgoffset: usize,
 ) -> KResult<usize> {
     let addr = VAddr::from(addr);
-    if !addr.is_page_aligned() || len == 0 {
+    if !addr.is_page_aligned() || pgoffset % PAGE_SIZE != 0 || len == 0 {
         return Err(EINVAL);
     }
 
     let len = len.align_up(PAGE_SIZE);
-    check_impl(flags.contains(UserMmapFlags::MAP_ANONYMOUS), ENOMEM)?;
-    check_impl(flags.contains(UserMmapFlags::MAP_PRIVATE), EINVAL)?;
-    if fd != u32::MAX || pgoffset != 0 {
-        return Err(EINVAL);
-    }
-
     let mm_list = &thread.process.mm_list;
+    let is_shared = flags.contains(UserMmapFlags::MAP_SHARED);
 
-    // TODO!!!: If we are doing mmap's in 32-bit mode, we should check whether
-    //          `addr` is above user reachable memory.
-    let addr = if flags.contains(UserMmapFlags::MAP_FIXED) {
-        if prot.is_empty() {
-            Task::block_on(mm_list.protect(
-                addr,
-                len,
-                Permission {
-                    read: prot.contains(UserMmapProtocol::PROT_READ),
-                    write: prot.contains(UserMmapProtocol::PROT_WRITE),
-                    execute: prot.contains(UserMmapProtocol::PROT_EXEC),
-                },
-            ))
-            .map(|_| addr)
+    let mapping = if flags.contains(UserMmapFlags::MAP_ANONYMOUS) {
+        if pgoffset != 0 {
+            return Err(EINVAL);
+        }
+
+        if !is_shared {
+            Mapping::Anonymous
         } else {
-            mm_list.mmap_fixed(
-                addr,
+            // The mode is unimportant here, since we are checking prot in mm_area.
+            let shared_area = Task::block_on(SHM_MANAGER.lock()).create_shared_area(
                 len,
-                Mapping::Anonymous,
-                Permission {
-                    read: prot.contains(UserMmapProtocol::PROT_READ),
-                    write: prot.contains(UserMmapProtocol::PROT_WRITE),
-                    execute: prot.contains(UserMmapProtocol::PROT_EXEC),
-                },
-            )
+                thread.process.pid,
+                0x777,
+            );
+            Mapping::File(FileMapping::new(shared_area.area.clone(), 0, len))
         }
     } else {
-        mm_list.mmap_hint(
-            addr,
-            len,
-            Mapping::Anonymous,
-            Permission {
-                read: prot.contains(UserMmapProtocol::PROT_READ),
-                write: prot.contains(UserMmapProtocol::PROT_WRITE),
-                execute: prot.contains(UserMmapProtocol::PROT_EXEC),
-            },
-        )
+        let file = thread
+            .files
+            .get(fd)
+            .ok_or(EBADF)?
+            .get_inode()?
+            .ok_or(EBADF)?;
+
+        Mapping::File(FileMapping::new(file, pgoffset, len))
+    };
+
+    let permission = Permission {
+        read: prot.contains(UserMmapProtocol::PROT_READ),
+        write: prot.contains(UserMmapProtocol::PROT_WRITE),
+        execute: prot.contains(UserMmapProtocol::PROT_EXEC),
+    };
+
+    // TODO!!!: If we are doing mmap's in 32-bit mode, we should check whether
+    //          `addr` is above user reachable memory.
+    let addr = if flags.contains(UserMmapFlags::MAP_FIXED) {
+        Task::block_on(mm_list.unmap(addr, len));
+        mm_list.mmap_fixed(addr, len, mapping, permission, is_shared)
+    } else {
+        mm_list.mmap_hint(addr, len, mapping, permission, is_shared)
     };
 
     addr.map(|addr| addr.addr())
 }
 
-#[cfg(target_arch = "riscv64")]
+#[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))]
 #[eonix_macros::define_syscall(SYS_MMAP)]
 fn mmap(
     addr: usize,
     len: usize,
     prot: UserMmapProtocol,
     flags: UserMmapFlags,
-    fd: u32,
+    fd: FD,
     offset: usize,
 ) -> KResult<usize> {
-    do_mmap2(thread, addr, len, prot, flags, fd, offset / PAGE_SIZE)
+    do_mmap2(thread, addr, len, prot, flags, fd, offset)
 }
 
 #[cfg(target_arch = "x86_64")]
@@ -121,7 +123,7 @@ fn mmap2(
     len: usize,
     prot: UserMmapProtocol,
     flags: UserMmapFlags,
-    fd: u32,
+    fd: FD,
     pgoffset: usize,
 ) -> KResult<usize> {
     do_mmap2(thread, addr, len, prot, flags, fd, pgoffset)
@@ -169,6 +171,99 @@ fn mprotect(addr: usize, len: usize, prot: UserMmapProtocol) -> KResult<()> {
     ))
 }
 
+#[eonix_macros::define_syscall(SYS_SHMGET)]
+fn shmget(key: usize, size: usize, shmflg: u32) -> KResult<u32> {
+    let size = size.align_up(PAGE_SIZE);
+
+    let mut shm_manager = Task::block_on(SHM_MANAGER.lock());
+    let shmid = gen_shm_id(key)?;
+
+    let mode = shmflg & 0o777;
+    let shmflg = ShmFlags::from_bits_truncate(shmflg);
+
+    if key == IPC_PRIVATE {
+        let new_shm = shm_manager.create_shared_area(size, thread.process.pid, mode);
+        shm_manager.insert(shmid, new_shm);
+        return Ok(shmid);
+    }
+
+    if let Some(_) = shm_manager.get(shmid) {
+        if shmflg.contains(ShmFlags::IPC_CREAT | ShmFlags::IPC_EXCL) {
+            return Err(EEXIST);
+        }
+
+        return Ok(shmid);
+    }
+
+    if shmflg.contains(ShmFlags::IPC_CREAT) {
+        let new_shm = shm_manager.create_shared_area(size, thread.process.pid, mode);
+        shm_manager.insert(shmid, new_shm);
+        return Ok(shmid);
+    }
+
+    return Err(ENOENT);
+}
+
+#[eonix_macros::define_syscall(SYS_SHMAT)]
+fn shmat(shmid: u32, addr: usize, shmflg: u32) -> KResult<usize> {
+    let mm_list = &thread.process.mm_list;
+    let shm_manager = Task::block_on(SHM_MANAGER.lock());
+    let shm_area = shm_manager.get(shmid).ok_or(EINVAL)?;
+
+    let mode = shmflg & 0o777;
+    let shmflg = ShmFlags::from_bits_truncate(shmflg);
+
+    let mut permission = Permission {
+        read: true,
+        write: true,
+        execute: false,
+    };
+
+    if shmflg.contains(ShmFlags::SHM_EXEC) {
+        permission.execute = true;
+    }
+    if shmflg.contains(ShmFlags::SHM_RDONLY) {
+        permission.write = false;
+    }
+
+    let size = shm_area.shmid_ds.shm_segsz;
+
+    let mapping = Mapping::File(FileMapping {
+        file: shm_area.area.clone(),
+        offset: 0,
+        length: size,
+    });
+
+    let addr = if addr != 0 {
+        if addr % PAGE_SIZE != 0 && !shmflg.contains(ShmFlags::SHM_RND) {
+            return Err(EINVAL);
+        }
+        let addr = VAddr::from(addr.align_down(PAGE_SIZE));
+        mm_list.mmap_fixed(addr, size, mapping, permission, true)
+    } else {
+        mm_list.mmap_hint(VAddr::NULL, size, mapping, permission, true)
+    }?;
+
+    thread.process.shm_areas.lock().insert(addr, size);
+
+    Ok(addr.addr())
+}
+
+#[eonix_macros::define_syscall(SYS_SHMDT)]
+fn shmdt(addr: usize) -> KResult<usize> {
+    let addr = VAddr::from(addr);
+    let mut shm_areas = thread.process.shm_areas.lock();
+    let size = *shm_areas.get(&addr).ok_or(EINVAL)?;
+    shm_areas.remove(&addr);
+    drop(shm_areas);
+    return Task::block_on(thread.process.mm_list.unmap(addr, size)).map(|_| 0);
+}
+
+#[eonix_macros::define_syscall(SYS_SHMCTL)]
+fn shmctl(shmid: u32, op: i32, shmid_ds: usize) -> KResult<usize> {
+    Ok(0)
+}
+
 #[eonix_macros::define_syscall(SYS_MEMBARRIER)]
 fn membarrier(_cmd: usize, _flags: usize) -> KResult<()> {
     Ok(())

+ 154 - 8
src/kernel/syscall/procops.rs

@@ -1,17 +1,19 @@
 use super::SyscallNoReturn;
 use crate::io::Buffer;
-use crate::kernel::constants::{EINVAL, ENOENT, ENOTDIR, ERANGE, ESRCH};
+use crate::kernel::constants::{
+    CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_REALTIME_COARSE, EINVAL, ENOENT, ENOTDIR, ERANGE, ESRCH,
+};
 use crate::kernel::constants::{
     ENOSYS, PR_GET_NAME, PR_SET_NAME, RLIMIT_STACK, SIG_BLOCK, SIG_SETMASK, SIG_UNBLOCK,
 };
 use crate::kernel::mem::PageBuffer;
 use crate::kernel::task::{
-    do_clone, futex_wait, futex_wake, FutexFlags, FutexOp, ProcessList, ProgramLoader,
+    do_clone, futex_wait, futex_wake, yield_now, FutexFlags, FutexOp, ProcessList, ProgramLoader,
     RobustListHead, SignalAction, Thread, WaitId, WaitType,
 };
 use crate::kernel::task::{parse_futexop, CloneArgs};
 use crate::kernel::timer::sleep;
-use crate::kernel::user::dataflow::{CheckedUserPointer, UserString};
+use crate::kernel::user::dataflow::UserString;
 use crate::kernel::user::{UserPointer, UserPointerMut};
 use crate::kernel::vfs::{self, dentry::Dentry};
 use crate::path::Path;
@@ -66,6 +68,37 @@ fn nanosleep(req: *const (u32, u32), rem: *mut (u32, u32)) -> KResult<usize> {
     Ok(0)
 }
 
+#[eonix_macros::define_syscall(SYS_CLOCK_NANOSLEEP)]
+fn clock_nanosleep(
+    clock_id: u32,
+    flags: u32,
+    req: *const (u32, u32),
+    rem: *mut (u32, u32),
+) -> KResult<usize> {
+    if clock_id != CLOCK_REALTIME
+        && clock_id != CLOCK_REALTIME_COARSE
+        && clock_id != CLOCK_MONOTONIC
+    {
+        unimplemented!("Unsupported clock_id: {}", clock_id);
+    }
+
+    let req = UserPointer::new(req)?.read()?;
+    let rem = if rem.is_null() {
+        None
+    } else {
+        Some(UserPointerMut::new(rem)?)
+    };
+
+    let duration = Duration::from_secs(req.0 as u64) + Duration::from_nanos(req.1 as u64);
+    Task::block_on(sleep(duration));
+
+    if let Some(rem) = rem {
+        rem.write((0, 0))?;
+    }
+
+    Ok(0)
+}
+
 #[eonix_macros::define_syscall(SYS_UMASK)]
 fn umask(mask: u32) -> KResult<u32> {
     let mut umask = thread.fs_context.umask.lock();
@@ -106,9 +139,21 @@ fn chdir(path: *const u8) -> KResult<()> {
     Ok(())
 }
 
+#[eonix_macros::define_syscall(SYS_UMOUNT)]
+fn umount(source: *const u8) -> KResult<()> {
+    let source = UserString::new(source)?;
+    if source.as_cstr().to_str().unwrap() == "./mnt" {
+        return Ok(());
+    }
+    return Err(ENOENT);
+}
+
 #[eonix_macros::define_syscall(SYS_MOUNT)]
 fn mount(source: *const u8, target: *const u8, fstype: *const u8, flags: usize) -> KResult<()> {
     let source = UserString::new(source)?;
+    if source.as_cstr().to_str().unwrap() == "/dev/vda2" {
+        return Ok(());
+    }
     let target = UserString::new(target)?;
     let fstype = UserString::new(fstype)?;
 
@@ -395,16 +440,54 @@ fn geteuid() -> KResult<u32> {
     do_geteuid(thread)
 }
 
-#[eonix_macros::define_syscall(SYS_GETGID)]
-fn getgid() -> KResult<u32> {
+#[eonix_macros::define_syscall(SYS_GETEGID)]
+fn getegid() -> KResult<u32> {
     // All users are root for now.
     Ok(0)
 }
 
+#[eonix_macros::define_syscall(SYS_GETGID)]
+fn getgid() -> KResult<u32> {
+    sys_getegid(thread)
+}
+
 #[cfg(target_arch = "x86_64")]
 #[eonix_macros::define_syscall(SYS_GETGID32)]
 fn getgid32() -> KResult<u32> {
-    sys_getgid(thread)
+    sys_getegid(thread)
+}
+
+#[eonix_macros::define_syscall(SYS_GETRANDOM)]
+fn getrandom(buf: *mut u8, buflen: usize, _flags: u32) -> isize {
+    if buf.is_null() || buflen == 0 {
+        return -14;
+    }
+
+    static mut SEED: u64 = 1;
+    unsafe {
+        for i in 0..buflen {
+            SEED = SEED.wrapping_mul(1103515245).wrapping_add(12345);
+            *buf.add(i) = (SEED >> 8) as u8;
+        }
+    }
+
+    buflen as isize
+}
+
+#[eonix_macros::define_syscall(SYS_SCHED_YIELD)]
+fn sched_yield() -> KResult<()> {
+    Task::block_on(yield_now());
+    Ok(())
+}
+
+#[eonix_macros::define_syscall(SYS_SYNC)]
+fn sync() -> KResult<()> {
+    Ok(())
+}
+
+#[eonix_macros::define_syscall(SYS_FSYNC)]
+fn fsync() -> KResult<()> {
+    Ok(())
 }
 
 #[eonix_macros::define_syscall(SYS_GETTID)]
@@ -440,7 +523,7 @@ pub fn parse_user_tls(arch_tls: usize) -> KResult<UserTLS> {
         Ok(new_tls)
     }
 
-    #[cfg(target_arch = "riscv64")]
+    #[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))]
     {
         Ok(UserTLS::new(arch_tls as u64))
     }
@@ -523,6 +606,19 @@ fn tkill(tid: u32, sig: u32) -> KResult<()> {
     Ok(())
 }
 
+#[eonix_macros::define_syscall(SYS_TGKILL)]
+fn tgkill(tgid: u32, tid: u32, sig: u32) -> KResult<()> {
+    let procs = Task::block_on(ProcessList::get().read());
+
+    let thread_to_kill = procs.try_find_thread(tid).ok_or(ESRCH)?;
+    if thread_to_kill.process.pid != tgid {
+        return Err(ESRCH);
+    }
+
+    thread_to_kill.raise(Signal::try_from_raw(sig)?);
+    Ok(())
+}
+
 #[eonix_macros::define_syscall(SYS_RT_SIGPROCMASK)]
 fn rt_sigprocmask(
     how: u32,
@@ -555,6 +651,29 @@ fn rt_sigprocmask(
     Ok(())
 }
 
+#[repr(C)]
+#[derive(Clone, Copy)]
+struct TimeSpec32 {
+    tv_sec: i32,
+    tv_nsec: i32,
+}
+
+impl TimeSpec32 {
+    fn to_duration(&self) -> Duration {
+        Duration::new(self.tv_sec as u64, self.tv_nsec as u32)
+    }
+}
+
+#[eonix_macros::define_syscall(SYS_RT_SIGTIMEDWAIT_TIME32)]
+fn rt_sigtimedwait_time32(
+    _uthese: *const SigSet,
+    _uinfo: *mut SigInfo,
+    _uts: *const TimeSpec32,
+) -> KResult<i32> {
+    // TODO
+    Ok(0)
+}
+
 #[eonix_macros::define_syscall(SYS_RT_SIGACTION)]
 fn rt_sigaction(
     signum: u32,
@@ -610,7 +729,12 @@ fn prlimit64(
             }
 
             if !new_limit.is_null() {
-                return Err(ENOSYS);
+                let new_rlimit = UserPointer::new(new_limit)?.read()?;
+                if new_rlimit.rlim_cur > new_rlimit.rlim_max {
+                    return Err(EINVAL);
+                }
+                // TODO:
+                // thread.process().set_rlimit(resource, new_rlimit)?;
             }
             Ok(())
         }
@@ -623,6 +747,11 @@ fn getrlimit(resource: u32, rlimit: *mut RLimit) -> KResult<()> {
     sys_prlimit64(thread, 0, resource, core::ptr::null(), rlimit)
 }
 
+#[eonix_macros::define_syscall(SYS_SETRLIMIT)]
+fn setrlimit(resource: u32, rlimit: *const RLimit) -> KResult<()> {
+    sys_prlimit64(thread, 0, resource, rlimit, core::ptr::null_mut())
+}
+
 #[repr(C)]
 #[derive(Clone, Copy)]
 struct RUsage {
@@ -689,6 +818,9 @@ fn fork() -> KResult<u32> {
     do_clone(thread, clone_args)
 }
 
+// Some old platforms including x86_32, riscv and arm have the last two arguments
+// swapped, so we need to define two versions of `clone` syscall.
+#[cfg(not(target_arch = "loongarch64"))]
 #[eonix_macros::define_syscall(SYS_CLONE)]
 fn clone(
     clone_flags: usize,
@@ -702,6 +834,20 @@ fn clone(
     do_clone(thread, clone_args)
 }
 
+#[cfg(target_arch = "loongarch64")]
+#[eonix_macros::define_syscall(SYS_CLONE)]
+fn clone(
+    clone_flags: usize,
+    new_sp: usize,
+    parent_tidptr: usize,
+    child_tidptr: usize,
+    tls: usize,
+) -> KResult<u32> {
+    let clone_args = CloneArgs::for_clone(clone_flags, new_sp, child_tidptr, parent_tidptr, tls)?;
+
+    do_clone(thread, clone_args)
+}
+
 #[eonix_macros::define_syscall(SYS_FUTEX)]
 fn futex(
     uaddr: usize,

+ 42 - 12
src/kernel/syscall/sysinfo.rs

@@ -1,9 +1,10 @@
 use crate::{
+    io::Buffer as _,
     kernel::{
-        constants::{CLOCK_MONOTONIC, CLOCK_REALTIME, EINVAL},
+        constants::{CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_REALTIME_COARSE, EINTR, EINVAL},
         task::Thread,
         timer::{Instant, Ticks},
-        user::UserPointerMut,
+        user::{UserBuffer, UserPointerMut},
     },
     prelude::*,
 };
@@ -52,6 +53,9 @@ fn newuname(buffer: *mut NewUTSName) -> KResult<()> {
     #[cfg(target_arch = "riscv64")]
     copy_cstr_to_array(b"riscv64", &mut uname.machine);
 
+    #[cfg(target_arch = "loongarch64")]
+    copy_cstr_to_array(b"loongarch64", &mut uname.machine);
+
     copy_cstr_to_array(b"(none)", &mut uname.domainname);
 
     buffer.write(uname)
@@ -78,18 +82,26 @@ fn gettimeofday(timeval: *mut TimeVal, timezone: *mut ()) -> KResult<()> {
 }
 
 fn do_clock_gettime64(_thread: &Thread, clock_id: u32, timespec: *mut TimeSpec) -> KResult<()> {
-    if clock_id != CLOCK_REALTIME && clock_id != CLOCK_MONOTONIC {
-        unimplemented!("Unsupported clock_id: {}", clock_id);
-    }
-
     let timespec = UserPointerMut::new(timespec)?;
-    let now = Instant::now();
-    let since_epoch = now.since_epoch();
 
-    timespec.write(TimeSpec {
-        tv_sec: since_epoch.as_secs(),
-        tv_nsec: since_epoch.subsec_nanos(),
-    })
+    match clock_id {
+        CLOCK_REALTIME | CLOCK_REALTIME_COARSE => {
+            let now = Instant::now();
+            let since_epoch = now.since_epoch();
+            timespec.write(TimeSpec {
+                tv_sec: since_epoch.as_secs(),
+                tv_nsec: since_epoch.subsec_nanos(),
+            })
+        }
+        CLOCK_MONOTONIC => {
+            let uptime_secs = Ticks::since_boot().as_secs();
+            timespec.write(TimeSpec {
+                tv_sec: uptime_secs,
+                tv_nsec: 0,
+            })
+        }
+        clock_id => unimplemented!("Unsupported clock_id: {}", clock_id),
+    }
 }
 
 #[cfg(not(target_arch = "x86_64"))]
@@ -162,4 +174,22 @@ fn times(tms: *mut TMS) -> KResult<()> {
     })
 }
 
+#[eonix_macros::define_syscall(SYS_GETRANDOM)]
+fn get_random(buf: *mut u8, len: usize, flags: u32) -> KResult<usize> {
+    if flags != 0 {
+        return Err(EINVAL);
+    }
+
+    let mut buffer = UserBuffer::new(buf, len)?;
+    for i in (0u8..=255).cycle().step_by(53) {
+        let _ = buffer.fill(&[i])?;
+
+        if Thread::current().signal_list.has_pending_signal() {
+            return Err(EINTR);
+        }
+    }
+
+    Ok(len)
+}
+
 pub fn keep_alive() {}

+ 1 - 1
src/kernel/task.rs

@@ -18,4 +18,4 @@ pub use process_group::ProcessGroup;
 pub use process_list::ProcessList;
 pub use session::Session;
 pub use signal::SignalAction;
-pub use thread::{new_thread_runnable, Thread, ThreadBuilder};
+pub use thread::{new_thread_runnable, yield_now, Thread, ThreadBuilder};

+ 4 - 1
src/kernel/task/loader/elf.rs

@@ -274,6 +274,7 @@ impl<E: ElfArch> Elf<E> {
                 write: true,
                 execute: false,
             },
+            false,
         )?;
 
         StackInitializer::new(&mm_list, E::STACK_BASE_ADDR, args, envs, aux_vec).init()
@@ -356,11 +357,12 @@ impl<E: ElfArch> Elf<E> {
                 vmap_start,
                 file_len,
                 Mapping::File(FileMapping::new(
-                    self.file.clone(),
+                    self.file.get_inode()?,
                     file_offset,
                     real_file_length,
                 )),
                 permission,
+                false,
             )?;
         }
 
@@ -370,6 +372,7 @@ impl<E: ElfArch> Elf<E> {
                 vmem_len - file_len,
                 Mapping::Anonymous,
                 permission,
+                false,
             )?;
         }
 

+ 4 - 0
src/kernel/task/process.rs

@@ -15,6 +15,7 @@ use alloc::{
     sync::{Arc, Weak},
 };
 use core::sync::atomic::{AtomicU32, Ordering};
+use eonix_mm::address::VAddr;
 use eonix_runtime::task::Task;
 use eonix_sync::{
     AsProof as _, AsProofMut as _, Locked, Proof, ProofMut, RwLockReadGuard, SpinGuard,
@@ -50,6 +51,8 @@ pub struct Process {
 
     pub exit_signal: Option<Signal>,
 
+    pub shm_areas: Spin<BTreeMap<VAddr, usize>>,
+
     /// Parent process
     ///
     /// `parent` must be valid during the whole life of the process.
@@ -258,6 +261,7 @@ impl ProcessBuilder {
             pid: self.pid.expect("should set pid before building"),
             wait_list: WaitList::new(),
             mm_list,
+            shm_areas: Spin::new(BTreeMap::new()),
             exit_signal: self.exit_signal,
             parent: RCUPointer::empty(),
             pgroup: RCUPointer::empty(),

+ 10 - 3
src/kernel/task/signal.rs

@@ -263,7 +263,11 @@ impl SignalList {
         fpu_state: &mut FpuState,
         old_sigreturn: bool,
     ) -> KResult<()> {
-        #[cfg(not(any(target_arch = "x86_64", target_arch = "riscv64")))]
+        #[cfg(not(any(
+            target_arch = "x86_64",
+            target_arch = "riscv64",
+            target_arch = "loongarch64"
+        )))]
         compile_error!("`restore` is not implemented for this architecture");
 
         #[cfg(target_arch = "x86_64")]
@@ -280,9 +284,12 @@ impl SignalList {
             old_trap_ctx_vaddr
         };
 
-        #[cfg(target_arch = "riscv64")]
+        #[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))]
         let old_trap_ctx_vaddr = {
-            debug_assert!(!old_sigreturn, "Old sigreturn is not supported on RISC-V");
+            debug_assert!(
+                !old_sigreturn,
+                "Old sigreturn is not supported on RISC-V and LoongArch64"
+            );
             trap_ctx.get_stack_pointer()
         };
 

+ 18 - 3
src/kernel/task/signal/signal_action.rs

@@ -33,7 +33,11 @@ unsafe extern "C" fn vdso_sigreturn() {
 #[unsafe(naked)]
 #[unsafe(link_section = ".vdso.rt_sigreturn")]
 unsafe extern "C" fn vdso_rt_sigreturn() {
-    #[cfg(not(any(target_arch = "x86_64", target_arch = "riscv64")))]
+    #[cfg(not(any(
+        target_arch = "x86_64",
+        target_arch = "riscv64",
+        target_arch = "loongarch64"
+    )))]
     compile_error!("rt_sigreturn is not implemented for this architecture");
 
     #[cfg(target_arch = "riscv64")]
@@ -43,6 +47,13 @@ unsafe extern "C" fn vdso_rt_sigreturn() {
         sys_rt_sigreturn = const posix_types::syscall_no::SYS_RT_SIGRETURN,
     );
 
+    #[cfg(target_arch = "loongarch64")]
+    naked_asm!(
+        "li.d $a7, {sys_rt_sigreturn}",
+        "syscall 0",
+        sys_rt_sigreturn = const posix_types::syscall_no::SYS_RT_SIGRETURN,
+    );
+
     #[cfg(target_arch = "x86_64")]
     naked_asm!(
         "mov ${sys_rt_sigreturn}, %eax",
@@ -150,7 +161,11 @@ impl SignalAction {
         let return_address = if let Some(restorer) = restorer {
             restorer.addr().addr()
         } else {
-            #[cfg(not(any(target_arch = "x86_64", target_arch = "riscv64")))]
+            #[cfg(not(any(
+                target_arch = "x86_64",
+                target_arch = "riscv64",
+                target_arch = "loongarch64"
+            )))]
             compile_error!("`vdso_sigreturn` is not implemented for this architecture");
 
             #[cfg(target_arch = "x86_64")]
@@ -166,7 +181,7 @@ impl SignalAction {
                 }
             }
 
-            #[cfg(target_arch = "riscv64")]
+            #[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))]
             {
                 static VDSO_RT_SIGRETURN_ADDR: &'static unsafe extern "C" fn() =
                     &(vdso_rt_sigreturn as unsafe extern "C" fn());

+ 3 - 3
src/kernel/task/thread.rs

@@ -185,7 +185,7 @@ impl ThreadBuilder {
         let mut trap_ctx = thread.trap_ctx.borrow().clone();
         trap_ctx.set_user_return_value(0);
 
-        #[cfg(target_arch = "riscv64")]
+        #[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))]
         {
             let pc = trap_ctx.get_program_counter();
             trap_ctx.set_program_counter(pc + 4);
@@ -410,7 +410,7 @@ impl Thread {
                         let mut trap_ctx = self.trap_ctx.borrow();
                         trap_ctx.set_user_return_value(retval);
 
-                        #[cfg(target_arch = "riscv64")]
+                        #[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))]
                         {
                             let pc = trap_ctx.get_program_counter();
                             trap_ctx.set_program_counter(pc + 4);
@@ -446,7 +446,7 @@ impl Thread {
     }
 }
 
-async fn yield_now() {
+pub async fn yield_now() {
     struct Yield {
         yielded: bool,
     }

+ 1 - 1
src/kernel/timer.rs

@@ -18,7 +18,7 @@ static SLEEPERS_LIST: Spin<BinaryHeap<Reverse<Sleepers>>> = Spin::new(BinaryHeap
 #[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd)]
 pub struct Ticks(usize);
 
-#[derive(Default, Clone, Copy)]
+#[derive(Debug, Default, Clone, Copy)]
 pub struct Instant {
     secs_since_epoch: u64,
     nsecs_within: u32,

+ 87 - 0
src/kernel/user/dataflow.rs

@@ -152,6 +152,29 @@ impl CheckedUserPointer<'_> {
                 inout("a2") buffer => _,
                 out("t0") _,
             );
+
+            #[cfg(target_arch = "loongarch64")]
+            asm!(
+                "2:",
+                "ld.bu  $t0, $a1,  0",
+                "st.b   $t0, $a2,  0",
+                "addi.d $a1, $a1,  1",
+                "addi.d $a2, $a2,  1",
+                "addi.d $a0, $a0, -1",
+                "bnez   $a0, 2b",
+                "3:",
+                "nop",
+                ".pushsection .fix, \"a\", @progbits",
+                ".8byte 2b",      // instruction address
+                ".8byte 3b - 2b", // instruction length
+                ".8byte 3b",      // fix jump address
+                ".8byte 0x3",     // type: load
+                ".popsection",
+                inout("$a0") total => error_bytes,
+                inout("$a1") self.ptr => _,
+                inout("$a2") buffer => _,
+                out("$t0") _,
+            );
         }
 
         if error_bytes != 0 {
@@ -212,6 +235,29 @@ impl CheckedUserPointer<'_> {
                 inout("a2") self.ptr => _,
                 out("t0") _,
             );
+
+            #[cfg(target_arch = "loongarch64")]
+            asm!(
+                "2:",
+                "ld.bu  $t0, $a1,  0",
+                "st.b   $t0, $a2,  0",
+                "addi.d $a1, $a1,  1",
+                "addi.d $a2, $a2,  1",
+                "addi.d $a0, $a0, -1",
+                "bnez   $a0, 2b",
+                "3:",
+                "nop",
+                ".pushsection .fix, \"a\", @progbits",
+                ".8byte 2b",  // instruction address
+                ".8byte 3b - 2b",  // instruction length
+                ".8byte 3b",  // fix jump address
+                ".8byte 0x1", // type: store
+                ".popsection",
+                inout("$a0") total => error_bytes,
+                inout("$a1") data => _,
+                inout("$a2") self.ptr => _,
+                out("$t0") _,
+            );
         };
 
         if error_bytes != 0 {
@@ -269,6 +315,25 @@ impl CheckedUserPointer<'_> {
                 inout("a0") self.len => error_bytes,
                 inout("a1") self.ptr => _,
             );
+
+            #[cfg(target_arch = "loongarch64")]
+            asm!(
+                "2:",
+                "sb   $zero, $a1,  0",
+                "addi $a1,   $a1,  1",
+                "addi $a0,   $a0, -1",
+                "bnez $a0,   2b",
+                "3:",
+                "nop",
+                ".pushsection .fix, \"a\", @progbits",
+                ".8byte 2b",  // instruction address
+                ".8byte 3b - 2b",  // instruction length
+                ".8byte 3b",  // fix jump address
+                ".8byte 0x1", // type: store
+                ".popsection",
+                inout("$a0") self.len => error_bytes,
+                inout("$a1") self.ptr => _,
+            );
         };
 
         if error_bytes != 0 {
@@ -376,6 +441,28 @@ impl<'lt> UserString<'lt> {
                 inout("a0") MAX_LEN => result,
                 inout("a1") ptr.ptr => _,
             );
+
+            #[cfg(target_arch = "loongarch64")]
+            asm!(
+                "2:",
+                "ld.bu  $t0, $a1,  0",
+                "4:",
+                "beqz   $t0, 3f",
+                "addi.d $a1, $a1,  1",
+                "addi.d $a0, $a0, -1",
+                "bnez   $a0, 2b",
+                "3:",
+                "nop",
+                ".pushsection .fix, \"a\", @progbits",
+                ".8byte 2b",  // instruction address
+                ".8byte 4b - 2b",  // instruction length
+                ".8byte 3b",  // fix jump address
+                ".8byte 0x2", // type: string
+                ".popsection",
+                out("$t0") _,
+                inout("$a0") MAX_LEN => result,
+                inout("$a1") ptr.ptr => _,
+            );
         };
 
         if result == 0 {

+ 39 - 13
src/kernel/vfs/file.rs

@@ -11,6 +11,7 @@ use crate::{
         task::Thread,
         terminal::{Terminal, TerminalIORequest},
         user::{UserPointer, UserPointerMut},
+        vfs::inode::Inode,
         CharDevice,
     },
     prelude::*,
@@ -86,6 +87,15 @@ pub struct File {
     file_type: FileType,
 }
 
+impl File {
+    pub fn get_inode(&self) -> KResult<Option<Arc<dyn Inode>>> {
+        match &self.file_type {
+            FileType::Inode(inode_file) => Ok(Some(inode_file.dentry.get_inode()?)),
+            _ => Ok(None),
+        }
+    }
+}
+
 pub enum SeekOption {
     Set(usize),
     Current(isize),
@@ -324,7 +334,7 @@ impl InodeFile {
         Ok(new_cursor)
     }
 
-    fn write(&self, stream: &mut dyn Stream) -> KResult<usize> {
+    fn write(&self, stream: &mut dyn Stream, offset: Option<usize>) -> KResult<usize> {
         if !self.write {
             return Err(EBADF);
         }
@@ -336,23 +346,35 @@ impl InodeFile {
 
             Ok(nwrote)
         } else {
-            let nwrote = self.dentry.write(stream, WriteOffset::Position(*cursor))?;
+            let nwrote = if let Some(offset) = offset {
+                self.dentry.write(stream, WriteOffset::Position(offset))?
+            } else {
+                let nwrote = self.dentry.write(stream, WriteOffset::Position(*cursor))?;
+                *cursor += nwrote;
+                nwrote
+            };
 
-            *cursor += nwrote;
             Ok(nwrote)
         }
     }
 
-    fn read(&self, buffer: &mut dyn Buffer) -> KResult<usize> {
+    fn read(&self, buffer: &mut dyn Buffer, offset: Option<usize>) -> KResult<usize> {
         if !self.read {
             return Err(EBADF);
         }
 
-        let mut cursor = Task::block_on(self.cursor.lock());
+        let nread = if let Some(offset) = offset {
+            let nread = self.dentry.read(buffer, offset)?;
+            nread
+        } else {
+            let mut cursor = Task::block_on(self.cursor.lock());
 
-        let nread = self.dentry.read(buffer, *cursor)?;
+            let nread = self.dentry.read(buffer, *cursor)?;
+
+            *cursor += nread;
+            nread
+        };
 
-        *cursor += nread;
         Ok(nread)
     }
 
@@ -456,9 +478,9 @@ impl TerminalFile {
 }
 
 impl FileType {
-    pub async fn read(&self, buffer: &mut dyn Buffer) -> KResult<usize> {
+    pub async fn read(&self, buffer: &mut dyn Buffer, offset: Option<usize>) -> KResult<usize> {
         match self {
-            FileType::Inode(inode) => inode.read(buffer),
+            FileType::Inode(inode) => inode.read(buffer, offset),
             FileType::PipeRead(pipe) => pipe.pipe.read(buffer).await,
             FileType::TTY(tty) => tty.read(buffer).await,
             FileType::CharDev(device) => device.read(buffer),
@@ -481,9 +503,9 @@ impl FileType {
     //     }
     // }
 
-    pub async fn write(&self, stream: &mut dyn Stream) -> KResult<usize> {
+    pub async fn write(&self, stream: &mut dyn Stream, offset: Option<usize>) -> KResult<usize> {
         match self {
-            FileType::Inode(inode) => inode.write(stream),
+            FileType::Inode(inode) => inode.write(stream, offset),
             FileType::PipeWrite(pipe) => pipe.pipe.write(stream).await,
             FileType::TTY(tty) => tty.write(stream),
             FileType::CharDev(device) => device.write(stream),
@@ -527,12 +549,16 @@ impl FileType {
             if Thread::current().signal_list.has_pending_signal() {
                 return if cur == 0 { Err(EINTR) } else { Ok(cur) };
             }
-            let nread = self.read(&mut ByteBuffer::new(&mut buffer[..len])).await?;
+            let nread = self
+                .read(&mut ByteBuffer::new(&mut buffer[..len]), None)
+                .await?;
             if nread == 0 {
                 break;
             }
 
-            let nwrote = dest_file.write(&mut buffer[..nread].into_stream()).await?;
+            let nwrote = dest_file
+                .write(&mut buffer[..nread].into_stream(), None)
+                .await?;
             nsent += nwrote;
 
             if nwrote != len {

+ 18 - 4
src/kernel/vfs/inode.rs

@@ -4,6 +4,7 @@ use crate::kernel::constants::{
     EINVAL, EISDIR, ENOTDIR, EPERM, STATX_ATIME, STATX_BLOCKS, STATX_CTIME, STATX_GID, STATX_INO,
     STATX_MODE, STATX_MTIME, STATX_NLINK, STATX_SIZE, STATX_TYPE, STATX_UID, S_IFDIR, S_IFMT,
 };
+use crate::kernel::mem::PageCache;
 use crate::kernel::timer::Instant;
 use crate::{io::Buffer, prelude::*};
 use alloc::sync::{Arc, Weak};
@@ -34,6 +35,7 @@ pub type AtomicGid = AtomicU32;
 pub type Mode = u32;
 pub type AtomicMode = AtomicU32;
 
+#[derive(Debug)]
 pub struct InodeData {
     pub ino: Ino,
     pub size: AtomicISize,
@@ -53,13 +55,13 @@ pub struct InodeData {
 }
 
 impl InodeData {
-    pub const fn new(ino: Ino, vfs: Weak<dyn Vfs>) -> Self {
+    pub fn new(ino: Ino, vfs: Weak<dyn Vfs>) -> Self {
         Self {
             ino,
             vfs,
-            atime: Spin::new(Instant::default()),
-            ctime: Spin::new(Instant::default()),
-            mtime: Spin::new(Instant::default()),
+            atime: Spin::new(Instant::now()),
+            ctime: Spin::new(Instant::now()),
+            mtime: Spin::new(Instant::now()),
             rwsem: RwLock::new(()),
             size: AtomicU64::new(0),
             nlink: AtomicNlink::new(0),
@@ -126,10 +128,18 @@ pub trait Inode: Send + Sync + InodeInner + Any {
         Err(if self.is_dir() { EISDIR } else { EINVAL })
     }
 
+    fn read_direct(&self, buffer: &mut dyn Buffer, offset: usize) -> KResult<usize> {
+        Err(if self.is_dir() { EISDIR } else { EINVAL })
+    }
+
     fn write(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult<usize> {
         Err(if self.is_dir() { EISDIR } else { EINVAL })
     }
 
+    fn write_direct(&self, stream: &mut dyn Stream, offset: WriteOffset) -> KResult<usize> {
+        Err(if self.is_dir() { EISDIR } else { EINVAL })
+    }
+
     fn devid(&self) -> KResult<DevId> {
         Err(if self.is_dir() { EISDIR } else { EINVAL })
     }
@@ -162,6 +172,10 @@ pub trait Inode: Send + Sync + InodeInner + Any {
         Err(EPERM)
     }
 
+    fn page_cache(&self) -> Option<&PageCache> {
+        None
+    }
+
     fn statx(&self, stat: &mut StatX, mask: u32) -> KResult<()> {
         // Safety: ffi should have checked reference
         let vfs = self.vfs.upgrade().expect("Vfs is dropped");

+ 3 - 1
src/kernel_init.rs

@@ -33,7 +33,9 @@ pub fn setup_memory(data: &mut BootStrapData) {
         let attr = PageAttribute::PRESENT
             | PageAttribute::WRITE
             | PageAttribute::READ
-            | PageAttribute::GLOBAL;
+            | PageAttribute::GLOBAL
+            | PageAttribute::ACCESSED
+            | PageAttribute::DIRTY;
 
         let page = GenericPage::alloc_in(&alloc);
         pte.set(page.into_raw(), attr.into());

+ 28 - 4
src/lib.rs

@@ -26,7 +26,10 @@ use core::{
     hint::spin_loop,
     sync::atomic::{AtomicBool, Ordering},
 };
-use eonix_hal::{processor::CPU, traits::trap::IrqState, trap::disable_irqs_save};
+use eonix_hal::{
+    arch_exported::bootstrap::shutdown, processor::CPU, traits::trap::IrqState,
+    trap::disable_irqs_save,
+};
 use eonix_mm::address::PRange;
 use eonix_runtime::{run::FutureRun, scheduler::Scheduler, task::Task};
 use kernel::{
@@ -45,6 +48,19 @@ use kernel_init::setup_memory;
 use path::Path;
 use prelude::*;
 
+#[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))]
+fn do_panic() -> ! {
+    shutdown();
+}
+
+#[cfg(not(any(target_arch = "riscv64", target_arch = "loongarch64")))]
+fn do_panic() -> ! {
+    // Spin forever.
+    loop {
+        spin_loop();
+    }
+}
+
 #[panic_handler]
 fn panic(info: &core::panic::PanicInfo) -> ! {
     if let Some(location) = info.location() {
@@ -60,7 +76,7 @@ fn panic(info: &core::panic::PanicInfo) -> ! {
     println_fatal!();
     println_fatal!("{}", info.message());
 
-    loop {}
+    do_panic()
 }
 
 static BSP_OK: AtomicBool = AtomicBool::new(false);
@@ -74,8 +90,6 @@ fn kernel_init(mut data: eonix_hal::bootstrap::BootStrapData) -> ! {
         driver::sbi_console::init_console();
     }
 
-    kernel::pcie::init_pcie().expect("Unable to initialize PCIe bus");
-
     // To satisfy the `Scheduler` "preempt count == 0" assertion.
     eonix_preempt::disable();
 
@@ -123,6 +137,8 @@ async fn init_process(early_kstack: PRange) {
         irq_ctx.restore();
     }
 
+    kernel::pcie::init_pcie().expect("Unable to initialize PCIe bus");
+
     CharDevice::init().unwrap();
 
     #[cfg(target_arch = "x86_64")]
@@ -142,6 +158,14 @@ async fn init_process(early_kstack: PRange) {
         driver::goldfish_rtc::probe();
     }
 
+    #[cfg(target_arch = "loongarch64")]
+    {
+        driver::serial::init().unwrap();
+        driver::virtio::init_virtio_devices();
+        driver::e1000e::register_e1000e_driver();
+        driver::ahci::register_ahci_driver();
+    }
+
     fs::tmpfs::init();
     fs::procfs::init();
     fs::fat32::init();

BIN
user-programs/busybox.la64


+ 97 - 0
user-programs/init_script_loongarch64.sh

@@ -0,0 +1,97 @@
+#!/mnt/busybox sh
+
+BUSYBOX=/mnt/busybox
+
+freeze() {
+    echo "an error occurred while executing '''$@''', freezing..." >&2
+
+    while true; do
+        true
+    done
+}
+
+do_or_freeze() {
+    if $@; then
+        return
+    fi
+
+    freeze $@
+}
+
+do_or_freeze $BUSYBOX mkdir -p /dev
+
+do_or_freeze $BUSYBOX mknod -m 666 /dev/console c 5 1
+do_or_freeze $BUSYBOX mknod -m 666 /dev/null c 1 3
+do_or_freeze $BUSYBOX mknod -m 666 /dev/zero c 1 5
+do_or_freeze $BUSYBOX mknod -m 666 /dev/sda b 8 0
+do_or_freeze $BUSYBOX mknod -m 666 /dev/sda1 b 8 1
+do_or_freeze $BUSYBOX mknod -m 666 /dev/sdb b 8 16
+do_or_freeze $BUSYBOX mknod -m 666 /dev/ttyS0 c 4 64
+do_or_freeze $BUSYBOX mknod -m 666 /dev/ttyS1 c 4 65
+
+echo -n -e "deploying busybox... " >&2
+
+do_or_freeze $BUSYBOX mkdir -p /bin
+do_or_freeze $BUSYBOX --install -s /bin
+do_or_freeze $BUSYBOX mkdir -p /lib
+
+export PATH="/bin"
+
+echo ok >&2
+
+do_or_freeze mkdir -p /etc /root /proc
+do_or_freeze mount -t procfs proc proc
+
+# Check if the device /dev/sdb is available and can be read
+if dd if=/dev/sdb of=/dev/null bs=512 count=1; then
+    echo -n -e "Mounting the ext4 image... " >&2
+    do_or_freeze mkdir -p /mnt1
+    do_or_freeze mount -t ext4 /dev/sdb /mnt1
+    echo ok >&2
+fi
+
+cp /mnt/ld-musl-i386.so.1 /lib/ld-musl-i386.so.1
+ln -s /lib/ld-musl-i386.so.1 /lib/libc.so
+
+cat > /etc/passwd <<EOF
+root:x:0:0:root:/root:/mnt/busybox sh
+EOF
+
+cat > /etc/group <<EOF
+root:x:0:root
+EOF
+
+cat > /etc/profile <<EOF
+export PATH=/bin
+EOF
+
+cat > /root/.profile <<EOF
+export HOME=/root
+
+alias ll="ls -l "
+alias la="ls -la "
+EOF
+
+cat > /root/test.c <<EOF
+#include <stdio.h>
+
+int main() {
+    int var = 0;
+    printf("Hello, world!\n");
+    printf("Please input a number: \n");
+    scanf("%d", &var);
+    if (var > 0) {
+        printf("You typed a positive number.\n");
+    } else if (var == 0 ) {
+        printf("You input a zero.\n");
+    } else {
+        printf("You typed a negative number.\n");
+    }
+    return 0;
+}
+EOF
+
+exec $BUSYBOX sh -l < /dev/ttyS0 > /dev/ttyS0 2> /dev/ttyS0
+
+# We don't have a working init yet, so we use busybox sh directly for now.
+# exec /mnt/init /bin/sh -c 'exec sh -l < /dev/ttyS0 > /dev/ttyS0 2> /dev/ttyS0'

+ 6 - 6
user-programs/init_script_riscv64.sh

@@ -23,9 +23,9 @@ do_or_freeze $BUSYBOX mkdir -p /dev
 do_or_freeze $BUSYBOX mknod -m 666 /dev/console c 5 1
 do_or_freeze $BUSYBOX mknod -m 666 /dev/null c 1 3
 do_or_freeze $BUSYBOX mknod -m 666 /dev/zero c 1 5
-do_or_freeze $BUSYBOX mknod -m 666 /dev/sda b 8 0
-do_or_freeze $BUSYBOX mknod -m 666 /dev/sda1 b 8 1
-do_or_freeze $BUSYBOX mknod -m 666 /dev/sdb b 8 16
+do_or_freeze $BUSYBOX mknod -m 666 /dev/vda b 8 0
+do_or_freeze $BUSYBOX mknod -m 666 /dev/vda1 b 8 1
+do_or_freeze $BUSYBOX mknod -m 666 /dev/vdb b 8 16
 do_or_freeze $BUSYBOX mknod -m 666 /dev/ttyS0 c 4 64
 do_or_freeze $BUSYBOX mknod -m 666 /dev/ttyS1 c 4 65
 
@@ -42,11 +42,11 @@ echo ok >&2
 do_or_freeze mkdir -p /etc /root /proc
 do_or_freeze mount -t procfs proc proc
 
-# Check if the device /dev/sdb is available and can be read
-if dd if=/dev/sdb of=/dev/null bs=512 count=1; then
+# Check if the device /dev/vdb is available and can be read
+if dd if=/dev/vdb of=/dev/null bs=512 count=1; then
     echo -n -e "Mounting the ext4 image... " >&2
     do_or_freeze mkdir -p /mnt1
-    do_or_freeze mount -t ext4 /dev/sdb /mnt1
+    do_or_freeze mount -t ext4 /dev/vdb /mnt1
     echo ok >&2
 fi