浏览代码

Merge branch 'sched-rewrite' into shiai-master

greatbridf 10 月之前
父节点
当前提交
ba9a93014f

+ 24 - 0
Cargo.lock

@@ -19,6 +19,10 @@ dependencies = [
  "percpu-macros",
 ]
 
+[[package]]
+name = "atomic_unique_refcell"
+version = "0.1.0"
+
 [[package]]
 name = "autocfg"
 version = "1.4.0"
@@ -88,8 +92,10 @@ name = "gbos-rust-part"
 version = "0.1.0"
 dependencies = [
  "arch",
+ "atomic_unique_refcell",
  "bindgen",
  "bitflags",
+ "intrusive-collections",
  "itertools",
  "lazy_static",
  "spin",
@@ -101,6 +107,15 @@ version = "0.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
 
+[[package]]
+name = "intrusive-collections"
+version = "0.9.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "189d0897e4cbe8c75efedf3502c18c887b05046e59d28404d4d8e46cbc4d1e86"
+dependencies = [
+ "memoffset",
+]
+
 [[package]]
 name = "itertools"
 version = "0.13.0"
@@ -157,6 +172,15 @@ version = "2.7.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
 
+[[package]]
+name = "memoffset"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
+dependencies = [
+ "autocfg",
+]
+
 [[package]]
 name = "minimal-lexical"
 version = "0.2.1"

+ 16 - 6
Cargo.toml

@@ -8,14 +8,28 @@ crate-type = ["staticlib"]
 
 [dependencies]
 arch = { path = "./arch" }
+atomic_unique_refcell = { path = "./crates/atomic_unique_refcell", features = [
+    "no_std",
+] }
 bitflags = "2.6.0"
+intrusive-collections = "0.9.7"
 itertools = { version = "0.13.0", default-features = false }
 lazy_static = { version = "1.5.0", features = ["spin_no_std"] }
 spin = "0.9.8"
 
 [features]
-default = ["smp"]
-debug_syscall = []
+default = ["smp", "trace_future"]
+trace_condvar = []
+trace_syscall = []
+trace_scheduler = []
+trace_future = []
+log_trace = [
+    "trace_condvar",
+    "trace_syscall",
+    "trace_scheduler",
+    "trace_future",
+]
+log_debug = []
 smp = []
 
 [build-dependencies]
@@ -24,10 +38,6 @@ bindgen = "0.70.1"
 [profile.dev]
 panic = "abort"
 
-[profile.dev.package.core]
-opt-level = 2
-debug = true
-
 [profile.dev.package."*"]
 opt-level = 2
 debug = false

+ 1 - 1
Makefile.src

@@ -2,7 +2,7 @@
 QEMU_BIN=##PLACEHOLDER_1##
 GDB_BIN=##PLACEHOLDER_2##
 QEMU_ACCELERATION_FLAG=##PLACEHOLDER_3##
-QEMU_DEBUG_FLAG=#-d cpu_reset,int
+QEMU_DEBUG_FLAG=#-d cpu_reset,int -D build/qemu.log
 QEMU_ARGS=-machine q35 -drive id=disk,file=build/boot.img,format=raw,if=none \
 	-device ahci,id=ahci -device ide-hd,drive=disk,bus=ahci.0 -smp 4 \
 	-no-reboot -no-shutdown $(QEMU_ACCELERATION_FLAG) $(QEMU_DEBUG_FLAG)

+ 131 - 52
arch/src/x86_64/context.rs

@@ -1,71 +1,150 @@
 use core::arch::naked_asm;
 
+/// Necessary hardware states of task for context switch
 #[repr(C)]
 #[derive(Debug, Default)]
-struct ContextSwitchFrame {
-    r15: u64,
-    r14: u64,
-    r13: u64,
+pub struct TaskContext {
     r12: u64,
+    r13: u64,
+    r14: u64,
+    r15: u64,
     rbx: u64,
     rbp: u64,
-    eflags: u64,
-    rip: u64,
-}
-
-/// Necessary hardware states of task for context switch
-pub struct TaskContext {
-    /// The kernel stack pointer
-    pub rsp: u64,
-    // Extended states, i.e., FP/SIMD states to do!
+    rsp: u64,
+    rip: u64,    // Should we save rip here?
+    rflags: u64, // Should we save rflags here?
 }
 
 impl TaskContext {
+    /// Create a new task context with the given entry point and stack pointer.
+    /// The entry point is the function to be called when the task is scheduled.
+    /// The stack pointer is the address of the top of the stack.
+    /// The stack pointer should be aligned to 16 bytes.
     pub const fn new() -> Self {
-        Self { rsp: 0 }
+        Self {
+            r12: 0,
+            r13: 0,
+            r14: 0,
+            r15: 0,
+            rbx: 0,
+            rbp: 0,
+            rsp: 0,
+            rip: 0,
+            rflags: 0x200, // IF = 1 by default.
+        }
+    }
+
+    pub fn ip(&mut self, ip: usize) {
+        self.rip = ip as u64;
+    }
+
+    pub fn sp(&mut self, sp: usize) {
+        self.rsp = sp as u64;
     }
 
-    pub fn init(&mut self, entry: usize, kstack_top: usize) {
-        unsafe {
-            let frame_ptr = (kstack_top as *mut ContextSwitchFrame).sub(1);
-            core::ptr::write(
-                frame_ptr,
-                ContextSwitchFrame {
-                    rip: entry as u64,
-                    eflags: 0x200,
-                    ..Default::default()
-                },
-            );
-            self.rsp = frame_ptr as u64;
+    pub fn call1(&mut self, func: unsafe extern "C" fn(usize) -> !, arg: [usize; 1]) {
+        self.ip(Self::do_call as _);
+        self.rbp = func as _;
+        self.r12 = arg[0] as _;
+    }
+
+    pub fn call2(&mut self, func: unsafe extern "C" fn(usize, usize) -> !, arg: [usize; 2]) {
+        self.ip(Self::do_call as _);
+        self.rbp = func as _;
+
+        (self.r12, self.r13) = (arg[0] as _, arg[1] as _);
+    }
+
+    pub fn call3(&mut self, func: unsafe extern "C" fn(usize, usize, usize) -> !, arg: [usize; 3]) {
+        self.ip(Self::do_call as _);
+        self.rbp = func as _;
+
+        (self.r12, self.r13, self.r14) = (arg[0] as _, arg[1] as _, arg[2] as _);
+    }
+
+    pub fn call4(
+        &mut self,
+        func: unsafe extern "C" fn(usize, usize, usize, usize) -> !,
+        arg: [usize; 4],
+    ) {
+        self.ip(Self::do_call as _);
+        self.rbp = func as _;
+
+        (self.r12, self.r13, self.r14, self.r15) =
+            (arg[0] as _, arg[1] as _, arg[2] as _, arg[3] as _);
+    }
+
+    pub fn call5(
+        &mut self,
+        func: unsafe extern "C" fn(usize, usize, usize, usize, usize) -> !,
+        arg: [usize; 5],
+    ) {
+        self.ip(Self::do_call as _);
+        self.rbp = func as _;
+
+        (self.r12, self.r13, self.r14, self.r15, self.rbx) = (
+            arg[0] as _,
+            arg[1] as _,
+            arg[2] as _,
+            arg[3] as _,
+            arg[4] as _,
+        );
+    }
+
+    pub fn interrupt(&mut self, is_enabled: bool) {
+        if is_enabled {
+            self.rflags |= 0x200; // IF = 1
+        } else {
+            self.rflags &= !0x200; // IF = 0
         }
     }
 
-    #[inline(always)]
-    pub fn switch_to(&mut self, next_task: &mut Self) {
-        unsafe { _switch_to(&mut self.rsp, &mut next_task.rsp) }
+    #[naked]
+    pub unsafe extern "C" fn switch(from: &mut Self, to: &mut Self) {
+        naked_asm!(
+            "pop %rax",
+            "pushf",
+            "pop %rcx",
+            "mov %r12, (%rdi)",
+            "mov %r13, 8(%rdi)",
+            "mov %r14, 16(%rdi)",
+            "mov %r15, 24(%rdi)",
+            "mov %rbx, 32(%rdi)",
+            "mov %rbp, 40(%rdi)",
+            "mov %rsp, 48(%rdi)",
+            "mov %rax, 56(%rdi)",
+            "mov %rcx, 64(%rdi)",
+            "",
+            "mov (%rsi), %r12",
+            "mov 8(%rsi), %r13",
+            "mov 16(%rsi), %r14",
+            "mov 24(%rsi), %r15",
+            "mov 32(%rsi), %rbx",
+            "mov 40(%rsi), %rbp",
+            "mov 48(%rsi), %rdi", // store next stack pointer
+            "mov 56(%rsi), %rax",
+            "mov 64(%rsi), %rcx",
+            "push %rcx",
+            "popf",
+            "xchg %rdi, %rsp", // switch to new stack
+            "jmp *%rax",
+            options(att_syntax),
+        );
     }
-}
 
-#[naked]
-unsafe extern "C" fn _switch_to(current_context_sp: &mut u64, next_context_sp: &mut u64) {
-    naked_asm!(
-        "pushf",
-        "push %rbp",
-        "push %rbx",
-        "push %r12",
-        "push %r13",
-        "push %r14",
-        "push %r15",
-        "mov %rsp, (%rdi)",
-        "mov (%rsi), %rsp",
-        "pop %r15",
-        "pop %r14",
-        "pop %r13",
-        "pop %r12",
-        "pop %rbx",
-        "pop %rbp",
-        "popf",
-        "ret",
-        options(att_syntax),
-    );
+    #[naked]
+    /// Maximum of 5 arguments supported.
+    unsafe extern "C" fn do_call() -> ! {
+        naked_asm!(
+            "mov %r12, %rdi",
+            "mov %r13, %rsi",
+            "mov %r14, %rdx",
+            "mov %r15, %rcx",
+            "mov %rbx, %r8",
+            "mov %rbp, %rax",
+            "xor %rbp, %rbp",
+            "jmp *%rax",
+            options(att_syntax),
+        );
+    }
 }

+ 185 - 143
arch/src/x86_64/interrupt.rs

@@ -34,161 +34,163 @@ global_asm!(
     .set SS, 0xa8
 
     .macro movcfi reg, offset
-    	mov \reg, \offset(%rsp)
-    	.cfi_rel_offset \reg, \offset
+        mov \reg, \offset(%rsp)
+        .cfi_rel_offset \reg, \offset
     .endm
 
     .macro movrst reg, offset
-    	mov \offset(%rsp), \reg
-    	.cfi_restore \reg
+        mov \offset(%rsp), \reg
+        .cfi_restore \reg
     .endm
 
     .globl ISR_stub_restore
     .type ISR_stub_restore @function
 
     ISR_stub:
-    	.cfi_startproc
-    	.cfi_signal_frame
-    	.cfi_def_cfa_offset 0x18
-    	.cfi_offset %rsp, 0x10
-
-    	cmpq $0x08, 24(%rsp)
-    	je 1f
-    	swapgs
-    
+        .cfi_startproc
+        .cfi_signal_frame
+        .cfi_def_cfa_offset 0x18
+        .cfi_offset %rsp, 0x10
+
+        cmpq $0x08, 24(%rsp)
+        je 1f
+        swapgs
+
     1:
-    	sub $0x78, %rsp
-    	.cfi_def_cfa_offset 0x90
-    
-    	movcfi %rax, RAX
-    	movcfi %rbx, RBX
-    	movcfi %rcx, RCX
-    	movcfi %rdx, RDX
-    	movcfi %rdi, RDI
-    	movcfi %rsi, RSI
-    	movcfi %r8,  R8
-    	movcfi %r9,  R9
-    	movcfi %r10, R10
-    	movcfi %r11, R11
-    	movcfi %r12, R12
-    	movcfi %r13, R13
-    	movcfi %r14, R14
-    	movcfi %r15, R15
-    	movcfi %rbp, RBP
-    
-    	mov INT_NO(%rsp), %rax
-    	sub $ISR0, %rax
-    	shr $3, %rax
-    	mov %rax, INT_NO(%rsp)
-    
-    	mov %rsp, %rbx
-    	.cfi_def_cfa_register %rbx
-    
-    	and $~0xf, %rsp
-    	sub $512, %rsp
-    	fxsave (%rsp)
-    
-    	mov %rbx, %rdi
-    	mov %rsp, %rsi
-    	call interrupt_handler
-    
+        sub $0x78, %rsp
+        .cfi_def_cfa_offset 0x90
+
+        movcfi %rax, RAX
+        movcfi %rbx, RBX
+        movcfi %rcx, RCX
+        movcfi %rdx, RDX
+        movcfi %rdi, RDI
+        movcfi %rsi, RSI
+        movcfi %r8,  R8
+        movcfi %r9,  R9
+        movcfi %r10, R10
+        movcfi %r11, R11
+        movcfi %r12, R12
+        movcfi %r13, R13
+        movcfi %r14, R14
+        movcfi %r15, R15
+        movcfi %rbp, RBP
+
+        mov INT_NO(%rsp), %rax
+        sub $ISR0, %rax
+        shr $3, %rax
+        mov %rax, INT_NO(%rsp)
+
+        mov %rsp, %rbx
+        .cfi_def_cfa_register %rbx
+
+        and $~0xf, %rsp
+        sub $512, %rsp
+        fxsave (%rsp)
+
+        mov %rbx, %rdi
+        mov %rsp, %rsi
+        call interrupt_handler
+
     ISR_stub_restore:
-    	fxrstor (%rsp)
-    	mov %rbx, %rsp
-    	.cfi_def_cfa_register %rsp
-    
-    	movrst %rax, RAX
-    	movrst %rbx, RBX
-    	movrst %rcx, RCX
-    	movrst %rdx, RDX
-    	movrst %rdi, RDI
-    	movrst %rsi, RSI
-    	movrst %r8,  R8
-    	movrst %r9,  R9
-    	movrst %r10, R10
-    	movrst %r11, R11
-    	movrst %r12, R12
-    	movrst %r13, R13
-    	movrst %r14, R14
-    	movrst %r15, R15
-    	movrst %rbp, RBP
-    
-    	add $0x88, %rsp
-    	.cfi_def_cfa_offset 0x08
-    
-    	cmpq $0x08, 8(%rsp)
-    	je 1f
-    	swapgs
-    
+        fxrstor (%rsp)
+        mov %rbx, %rsp
+        .cfi_def_cfa_register %rsp
+
+    .globl _arch_fork_return
+    _arch_fork_return:
+        movrst %rax, RAX
+        movrst %rbx, RBX
+        movrst %rcx, RCX
+        movrst %rdx, RDX
+        movrst %rdi, RDI
+        movrst %rsi, RSI
+        movrst %r8,  R8
+        movrst %r9,  R9
+        movrst %r10, R10
+        movrst %r11, R11
+        movrst %r12, R12
+        movrst %r13, R13
+        movrst %r14, R14
+        movrst %r15, R15
+        movrst %rbp, RBP
+
+        add $0x88, %rsp
+        .cfi_def_cfa_offset 0x08
+
+        cmpq $0x08, 8(%rsp)
+        je 1f
+        swapgs
+
     1:
-    	iretq
-    	.cfi_endproc
-    
+        iretq
+        .cfi_endproc
+
     .altmacro
     .macro build_isr_no_err name
-    	.align 8
-    	.globl ISR\name
-    	.type  ISR\name @function
-    	ISR\name:
-    		.cfi_startproc
-    		.cfi_signal_frame
-    		.cfi_def_cfa_offset 0x08
-    		.cfi_offset %rsp, 0x10
-    
-    		.cfi_same_value %rax
-    		.cfi_same_value %rbx
-    		.cfi_same_value %rcx
-    		.cfi_same_value %rdx
-    		.cfi_same_value %rdi
-    		.cfi_same_value %rsi
-    		.cfi_same_value %r8
-    		.cfi_same_value %r9
-    		.cfi_same_value %r10
-    		.cfi_same_value %r11
-    		.cfi_same_value %r12
-    		.cfi_same_value %r13
-    		.cfi_same_value %r14
-    		.cfi_same_value %r15
-    		.cfi_same_value %rbp
-    
-    		push %rbp # push placeholder for error code
-    		.cfi_def_cfa_offset 0x10
-    
-    		call ISR_stub
-    		.cfi_endproc
+        .align 8
+        .globl ISR\name
+        .type  ISR\name @function
+        ISR\name:
+            .cfi_startproc
+            .cfi_signal_frame
+            .cfi_def_cfa_offset 0x08
+            .cfi_offset %rsp, 0x10
+
+            .cfi_same_value %rax
+            .cfi_same_value %rbx
+            .cfi_same_value %rcx
+            .cfi_same_value %rdx
+            .cfi_same_value %rdi
+            .cfi_same_value %rsi
+            .cfi_same_value %r8
+            .cfi_same_value %r9
+            .cfi_same_value %r10
+            .cfi_same_value %r11
+            .cfi_same_value %r12
+            .cfi_same_value %r13
+            .cfi_same_value %r14
+            .cfi_same_value %r15
+            .cfi_same_value %rbp
+
+            push %rbp # push placeholder for error code
+            .cfi_def_cfa_offset 0x10
+
+            call ISR_stub
+            .cfi_endproc
     .endm
-    
+
     .altmacro
     .macro build_isr_err name
-    	.align 8
-    	.globl ISR\name
-    	.type  ISR\name @function
-    	ISR\name:
-    		.cfi_startproc
-    		.cfi_signal_frame
-    		.cfi_def_cfa_offset 0x10
-    		.cfi_offset %rsp, 0x10
-    
-    		.cfi_same_value %rax
-    		.cfi_same_value %rbx
-    		.cfi_same_value %rcx
-    		.cfi_same_value %rdx
-    		.cfi_same_value %rdi
-    		.cfi_same_value %rsi
-    		.cfi_same_value %r8
-    		.cfi_same_value %r9
-    		.cfi_same_value %r10
-    		.cfi_same_value %r11
-    		.cfi_same_value %r12
-    		.cfi_same_value %r13
-    		.cfi_same_value %r14
-    		.cfi_same_value %r15
-    		.cfi_same_value %rbp
-    
-    		call ISR_stub
-    		.cfi_endproc
+        .align 8
+        .globl ISR\name
+        .type  ISR\name @function
+        ISR\name:
+            .cfi_startproc
+            .cfi_signal_frame
+            .cfi_def_cfa_offset 0x10
+            .cfi_offset %rsp, 0x10
+
+            .cfi_same_value %rax
+            .cfi_same_value %rbx
+            .cfi_same_value %rcx
+            .cfi_same_value %rdx
+            .cfi_same_value %rdi
+            .cfi_same_value %rsi
+            .cfi_same_value %r8
+            .cfi_same_value %r9
+            .cfi_same_value %r10
+            .cfi_same_value %r11
+            .cfi_same_value %r12
+            .cfi_same_value %r13
+            .cfi_same_value %r14
+            .cfi_same_value %r15
+            .cfi_same_value %rbp
+
+            call ISR_stub
+            .cfi_endproc
     .endm
-    
+
     build_isr_no_err 0
     build_isr_no_err 1
     build_isr_no_err 2
@@ -221,20 +223,20 @@ global_asm!(
     build_isr_err    29
     build_isr_err    30
     build_isr_no_err 31
-    
+
     .set i, 32
     .rept 0x80+1
-    	build_isr_no_err %i
-    	.set i, i+1
+        build_isr_no_err %i
+        .set i, i+1
     .endr
-    
+
     .section .rodata
-    
+
     .align 8
     .globl ISR_START_ADDR
     .type  ISR_START_ADDR @object
     ISR_START_ADDR:
-    	.quad ISR0
+        .quad ISR0
     ",
     options(att_syntax),
 );
@@ -304,6 +306,42 @@ pub struct InterruptControl {
     apic_base: APICRegs,
 }
 
+impl InterruptContext {
+    pub fn set_return_value(&mut self, value: u64) {
+        // The return value is stored in rax.
+        self.rax = value;
+    }
+
+    pub fn set_return_address(&mut self, addr: u64, user: bool) {
+        // The return address is stored in rip.
+        self.rip = addr;
+        if user {
+            self.cs = 0x2b; // User code segment
+        } else {
+            self.cs = 0x08; // Kernel code segment
+        }
+    }
+
+    pub fn set_stack_pointer(&mut self, sp: u64, user: bool) {
+        // The stack pointer is stored in rsp.
+        self.rsp = sp;
+        if user {
+            self.ss = 0x33; // User stack segment
+        } else {
+            self.ss = 0x10; // Kernel stack segment
+        }
+    }
+
+    pub fn set_interrupt_enabled(&mut self, enabled: bool) {
+        // The interrupt state is stored in eflags.
+        if enabled {
+            self.eflags |= 0x200; // Set the interrupt flag
+        } else {
+            self.eflags &= !0x200; // Clear the interrupt flag
+        }
+    }
+}
+
 impl IDTEntry {
     const fn new(offset: usize, selector: u16, attributes: u8) -> Self {
         Self {
@@ -465,6 +503,10 @@ pub fn disable_irqs() {
     }
 }
 
+extern "C" {
+    pub fn _arch_fork_return();
+}
+
 fn lidt(base: usize, limit: u16) {
     let mut idt_descriptor = [0u16; 5];
 

+ 7 - 0
crates/atomic_unique_refcell/Cargo.lock

@@ -0,0 +1,7 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "atomic_unique_refcell"
+version = "0.1.0"

+ 10 - 0
crates/atomic_unique_refcell/Cargo.toml

@@ -0,0 +1,10 @@
+[package]
+name = "atomic_unique_refcell"
+version = "0.1.0"
+edition = "2024"
+
+[features]
+default = []
+no_std = []
+
+[dependencies]

+ 104 - 0
crates/atomic_unique_refcell/src/lib.rs

@@ -0,0 +1,104 @@
+#![cfg_attr(feature = "no_std", no_std)]
+
+#[cfg(feature = "no_std")]
+use core::{
+    cell::UnsafeCell,
+    marker::PhantomData,
+    ops::{Deref, DerefMut},
+    sync::atomic::{AtomicBool, Ordering},
+};
+
+#[cfg(not(feature = "no_std"))]
+use std::{
+    cell::UnsafeCell,
+    marker::PhantomData,
+    ops::{Deref, DerefMut},
+    sync::atomic::{AtomicBool, Ordering},
+};
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test() {
+        let cell = AtomicUniqueRefCell::new(42);
+        let mut ref_cell = cell.borrow();
+        assert_eq!(*ref_cell, 42);
+        *ref_cell = 43;
+        assert_eq!(*ref_cell, 43);
+    }
+}
+
+/// `AtomicUniqueRefCell` implements `Send` and `Sync` if `T` is `Send`.
+/// The following code will not compile if `T` is not `Send`.
+///
+/// ```compile_fail
+/// use atomic_unique_refcell::AtomicUniqueRefCell;
+///
+/// struct NotSend {
+///     data: *mut (),
+/// }
+///
+/// struct Test {
+///     data: AtomicUniqueRefCell<NotSend>,
+/// }
+///
+/// trait TestTrait: Send + Sync {}
+///
+/// impl TestTrait for Test {}
+/// ```
+pub struct AtomicUniqueRefCell<T: ?Sized> {
+    count: AtomicBool,
+    inner: UnsafeCell<T>,
+}
+
+unsafe impl<T: ?Sized + Send> Send for AtomicUniqueRefCell<T> {}
+unsafe impl<T: ?Sized + Send> Sync for AtomicUniqueRefCell<T> {}
+
+pub struct Ref<'a, T: ?Sized> {
+    inner: &'a AtomicUniqueRefCell<T>,
+    _marker: PhantomData<UnsafeCell<T>>,
+}
+
+impl<T> AtomicUniqueRefCell<T> {
+    pub fn new(value: T) -> Self {
+        Self {
+            count: AtomicBool::new(false),
+            inner: UnsafeCell::new(value),
+        }
+    }
+}
+
+impl<T: ?Sized> AtomicUniqueRefCell<T> {
+    pub fn borrow(&self) -> Ref<'_, T> {
+        if self.count.swap(true, Ordering::Acquire) {
+            panic!("Already borrowed");
+        }
+
+        Ref {
+            inner: self,
+            _marker: PhantomData,
+        }
+    }
+}
+
+impl<T: ?Sized> Deref for Ref<'_, T> {
+    type Target = T;
+
+    fn deref(&self) -> &Self::Target {
+        unsafe { &*self.inner.inner.get() }
+    }
+}
+
+impl<T: ?Sized> DerefMut for Ref<'_, T> {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        unsafe { &mut *self.inner.inner.get() }
+    }
+}
+
+impl<T: ?Sized> Drop for Ref<'_, T> {
+    fn drop(&mut self) {
+        self.inner.count.swap(false, Ordering::Release);
+    }
+}

+ 1 - 1
pretty-print.py

@@ -526,5 +526,5 @@ gdb.execute('skip -rfu ^alloc::([a-zA-Z0-9_]+::)*[a-zA-Z0-9_<>]+')
 gdb.execute('skip -rfu ^std::([a-zA-Z0-9_]+::)*[a-zA-Z0-9_<>]+')
 gdb.execute('skip -rfu "^gbos_rust_part::sync::lock::Lock<[a-zA-Z0-9_<>: ,]+, [a-zA-Z0-9_<>: ,]+>::new<[a-zA-Z0-9_<>: ,]+, [a-zA-Z0-9_<>: ,]+>"')
 gdb.execute('skip -rfu "^gbos_rust_part::sync::locked::Locked<[a-zA-Z0-9_<>: ,]+, [a-zA-Z0-9_<>: ,]+>::new<[a-zA-Z0-9_<>: ,]+, [a-zA-Z0-9_<>: ,]+>"')
-gdb.execute('source ' + environ['HOME'] + '/.rustup/toolchains/nightly-aarch64-apple-darwin/lib/rustlib/etc/gdb_load_rust_pretty_printers.py')
+# gdb.execute('source ' + environ['HOME'] + '/.rustup/toolchains/nightly-aarch64-apple-darwin/lib/rustlib/etc/gdb_load_rust_pretty_printers.py')
 gdb.pretty_printers.append(build_pretty_printer)

+ 21 - 1
src/kernel/console.rs

@@ -72,6 +72,7 @@ macro_rules! println_debug {
     };
 }
 
+#[allow(unused_macros)]
 macro_rules! println_info {
     ($($arg:tt)*) => {
         $crate::println!("[kernel: info] {}", format_args!($($arg)*))
@@ -87,6 +88,25 @@ macro_rules! println_fatal {
     };
 }
 
+macro_rules! println_trace {
+    ($feat:literal) => {
+        #[deny(unexpected_cfgs)]
+        {
+            #[cfg(feature = $feat)]
+            $crate::println!("[kernel:trace] ")
+        }
+    };
+    ($feat:literal, $($arg:tt)*) => {{
+        #[deny(unexpected_cfgs)]
+        {
+            #[cfg(feature = $feat)]
+            $crate::println!("[kernel:trace] {}", format_args!($($arg)*))
+        }
+    }};
+}
+
 use super::terminal::Terminal;
 
-pub(crate) use {print, println, println_debug, println_fatal, println_info, println_warn};
+pub(crate) use {
+    print, println, println_debug, println_fatal, println_info, println_trace, println_warn,
+};

+ 2 - 1
src/kernel/mem/mm_list.rs

@@ -278,7 +278,8 @@ impl MMList {
     pub unsafe fn release(&self) {
         // TODO: Check whether we should wake someone up if they've been put to sleep when calling `vfork`.
         self.inner.swap(None);
-        self.root_page_table.store(0, Ordering::Relaxed);
+        self.root_page_table
+            .swap(KERNEL_PML4 as _, Ordering::Relaxed);
     }
 
     /// No need to do invalidation manually, `PageTable` already does it.

+ 5 - 33
src/kernel/smp.rs

@@ -1,20 +1,16 @@
-use alloc::{format, sync::Arc};
 use arch::define_smp_bootstrap;
 
 use crate::{
     kernel::{
         cpu::current_cpu,
         mem::{paging::Page, phys::PhysPtr as _},
-        task::{Process, Thread},
+        task::Task,
     },
     println_debug,
     sync::preempt,
 };
 
-use super::{
-    cpu::init_thiscpu,
-    task::{ProcessList, Scheduler},
-};
+use super::{cpu::init_thiscpu, task::Scheduler};
 
 define_smp_bootstrap!(4, ap_entry, {
     let page = Page::alloc_many(9);
@@ -23,40 +19,16 @@ define_smp_bootstrap!(4, ap_entry, {
     stack_bottom
 });
 
-unsafe extern "C" fn ap_entry() {
+unsafe extern "C" fn ap_entry() -> ! {
     init_thiscpu();
+    Scheduler::init_scheduler_thiscpu();
     println_debug!("AP{} started", current_cpu().cpuid());
 
-    {
-        let mut procs = ProcessList::get().lock_nosleep();
-        let idle_process = procs.idle_process().clone();
-
-        let idle_thread_name = format!("[kernel idle#AP{}]", 0);
-        let idle_thread = Thread::new_for_init(
-            Arc::from(idle_thread_name.as_bytes()),
-            Process::alloc_pid(),
-            &idle_process,
-            procs.as_mut(),
-        );
-        Scheduler::set_idle_and_current(idle_thread);
-    }
-
     preempt::disable();
     arch::enable_irqs();
 
     // TODO!!!!!: Free the stack after having switched to idle task.
-
-    // TODO: Temporary solution: we will never access this later on.
-    let mut unuse_ctx = arch::TaskContext::new();
-    let mut unused_area = [0u8; 64];
-    unuse_ctx.init(0, unused_area.as_mut_ptr() as usize);
-    unsafe {
-        arch::TaskContext::switch_to(
-            &mut unuse_ctx, // We will never come back
-            &mut *Scheduler::idle_task().get_context_mut_ptr(),
-        );
-    }
-    arch::freeze()
+    Task::switch_noreturn(&Task::idle());
 }
 
 pub unsafe fn bootstrap_smp() {

+ 19 - 18
src/kernel/syscall.rs

@@ -96,6 +96,7 @@ macro_rules! arg_register {
     };
 }
 
+#[allow(unused_macros)]
 macro_rules! format_expand {
     ($name:ident, $arg:tt) => {
         format_args!("{}: {:x?}", stringify!($name), $arg)
@@ -108,34 +109,34 @@ macro_rules! format_expand {
 macro_rules! syscall32_call {
     ($is:ident, $handler:ident, $($arg:ident: $type:ty),*) => {{
         use $crate::kernel::syscall::{MapArgument, MapArgumentImpl, arg_register};
+        #[allow(unused_imports)]
         use $crate::kernel::syscall::{MapReturnValue, format_expand};
-        use $crate::{kernel::task::Thread, println_info};
+        #[allow(unused_imports)]
+        use $crate::{kernel::task::Thread, println_trace};
 
         $(
             let $arg: $type =
                 MapArgumentImpl::map_arg(arg_register!(${index()}, $is));
         )*
 
-        if cfg!(feature = "debug_syscall") {
-            println_info!(
-                "tid{}: {}({}) => {{",
-                Thread::current().tid,
-                stringify!($handler),
-                format_expand!($($arg, $arg),*),
-            );
-        }
+        println_trace!(
+            "trace_syscall",
+            "tid{}: {}({}) => {{",
+            Thread::current().tid,
+            stringify!($handler),
+            format_expand!($($arg, $arg),*),
+        );
 
         let result = $handler($($arg),*);
 
-        if cfg!(feature = "debug_syscall") {
-            println_info!(
-                "tid{}: {}({}) => }} = {:x?}",
-                Thread::current().tid,
-                stringify!($handler),
-                format_expand!($($arg, $arg),*),
-                result
-            );
-        }
+        println_trace!(
+            "trace_syscall",
+            "tid{}: {}({}) => }} = {:x?}",
+            Thread::current().tid,
+            stringify!($handler),
+            format_expand!($($arg, $arg),*),
+            result
+        );
 
         match result {
             Ok(val) => MapReturnValue::map_ret(val),

+ 38 - 15
src/kernel/syscall/procops.rs

@@ -11,7 +11,8 @@ use crate::kernel::constants::{
 };
 use crate::kernel::mem::{Page, PageBuffer, VAddr};
 use crate::kernel::task::{
-    ProcessList, Scheduler, Signal, SignalAction, Thread, UserDescriptor, WaitObject, WaitType,
+    ProcessBuilder, ProcessList, Scheduler, Signal, SignalAction, Task, Thread, ThreadBuilder,
+    ThreadRunnable, UserDescriptor, WaitObject, WaitType,
 };
 use crate::kernel::user::dataflow::UserString;
 use crate::kernel::user::{UserPointer, UserPointerMut};
@@ -156,15 +157,21 @@ fn sys_execve(int_stack: &mut InterruptContext, _: &mut ExtendedContext) -> usiz
     }
 }
 
-// TODO: Find a better way.
-#[allow(unreachable_code)]
-fn do_exit(status: u32) -> KResult<()> {
-    {
+fn sys_exit(int_stack: &mut InterruptContext, _: &mut ExtendedContext) -> usize {
+    let status = int_stack.rbx as u32;
+
+    unsafe {
         let mut procs = ProcessList::get().lock();
+        preempt::disable();
+
+        // SAFETY: Preemption is disabled.
         procs.do_kill_process(&Thread::current().process, WaitType::Exited(status));
     }
-    Scheduler::schedule_noreturn();
-    panic!("schedule_noreturn returned!");
+
+    unsafe {
+        // SAFETY: Preempt count == 1.
+        Thread::runnable().exit();
+    }
 }
 
 bitflags! {
@@ -542,7 +549,6 @@ fn do_chmod(pathname: *const u8, mode: u32) -> KResult<()> {
 define_syscall32!(sys_chdir, do_chdir, path: *const u8);
 define_syscall32!(sys_umask, do_umask, mask: u32);
 define_syscall32!(sys_getcwd, do_getcwd, buffer: *mut u8, bufsize: usize);
-define_syscall32!(sys_exit, do_exit, status: u32);
 define_syscall32!(sys_waitpid, do_waitpid, waitpid: u32, arg1: *mut u32, options: u32);
 define_syscall32!(sys_wait4, do_wait4, waitpid: u32, arg1: *mut u32, options: u32, rusage: *mut ());
 define_syscall32!(sys_setsid, do_setsid);
@@ -579,13 +585,30 @@ fn sys_vfork(int_stack: &mut InterruptContext, ext: &mut ExtendedContext) -> usi
 
 fn sys_fork(int_stack: &mut InterruptContext, _: &mut ExtendedContext) -> usize {
     let mut procs = ProcessList::get().lock();
-    let new_thread = Thread::current().new_cloned(procs.as_mut());
-    let mut new_int_stack = int_stack.clone();
-    new_int_stack.rax = 0;
-    new_int_stack.eflags = 0x200;
-    new_thread.fork_init(new_int_stack);
-    Scheduler::get().lock_irq().uwake(&new_thread);
-    new_thread.process.pid as usize
+
+    let current = Thread::current();
+    let current_process = current.process.clone();
+    let current_pgroup = current_process.pgroup(procs.as_pos()).clone();
+    let current_session = current_process.session(procs.as_pos()).clone();
+
+    let mut new_int_context = int_stack.clone();
+    new_int_context.set_return_value(0);
+
+    let thread_builder = ThreadBuilder::new().fork_from(&current);
+    let (new_thread, new_process) = ProcessBuilder::new()
+        .mm_list(current_process.mm_list.new_cloned())
+        .parent(current_process)
+        .pgroup(current_pgroup)
+        .session(current_session)
+        .thread_builder(thread_builder)
+        .build(&mut procs);
+
+    Scheduler::get().spawn(Task::new(ThreadRunnable::from_context(
+        new_thread,
+        new_int_context,
+    )));
+
+    new_process.pid as usize
 }
 
 fn sys_sigreturn(int_stack: &mut InterruptContext, ext_ctx: &mut ExtendedContext) -> usize {

+ 7 - 6
src/kernel/task.rs

@@ -1,18 +1,19 @@
-mod kstack;
 mod process;
 mod process_group;
 mod process_list;
+mod readyqueue;
 mod scheduler;
 mod session;
 mod signal;
+mod task;
 mod thread;
 
-pub(self) use kstack::KernelStack;
-
-pub use process::{Process, WaitObject, WaitType};
+pub use process::{Process, ProcessBuilder, WaitObject, WaitType};
 pub use process_group::ProcessGroup;
-pub use process_list::{init_multitasking, ProcessList};
+pub use process_list::ProcessList;
+pub use readyqueue::init_rq_thiscpu;
 pub use scheduler::Scheduler;
 pub use session::Session;
 pub use signal::{Signal, SignalAction};
-pub use thread::{Thread, ThreadState, UserDescriptor};
+pub use task::{FutureRunnable, Task, TaskContext};
+pub use thread::{Thread, ThreadBuilder, ThreadRunnable, UserDescriptor};

+ 0 - 44
src/kernel/task/kstack.rs

@@ -1,44 +0,0 @@
-use crate::kernel::{
-    cpu::current_cpu,
-    mem::{paging::Page, phys::PhysPtr},
-};
-use arch::InterruptContext;
-
-#[allow(dead_code)]
-pub struct KernelStack {
-    pages: Page,
-    bottom: usize,
-}
-
-impl KernelStack {
-    /// Kernel stack page order
-    /// 7 for `2^7 = 128 pages = 512 KiB`
-    const KERNEL_STACK_ORDER: u32 = 7;
-
-    pub fn new() -> Self {
-        let pages = Page::alloc_many(Self::KERNEL_STACK_ORDER);
-        let bottom = pages.as_cached().offset(pages.len()).as_ptr::<u8>() as usize;
-
-        Self { pages, bottom }
-    }
-
-    /// # Safety
-    /// This function is unsafe because it accesses the `current_cpu()`, which needs
-    /// to be called in a preemption disabled context.
-    pub unsafe fn load_interrupt_stack(&self) {
-        arch::load_interrupt_stack(current_cpu(), self.bottom as u64);
-    }
-
-    pub fn get_stack_bottom(&self) -> usize {
-        self.bottom
-    }
-
-    pub fn init(&self, interrupt_context: InterruptContext) -> usize {
-        let mut sp = self.bottom - core::mem::size_of::<InterruptContext>();
-        sp &= !0xf;
-        unsafe {
-            (sp as *mut InterruptContext).write(interrupt_context);
-        }
-        sp
-    }
-}

+ 109 - 49
src/kernel/task/process.rs

@@ -1,7 +1,4 @@
-use core::{
-    ptr::addr_of,
-    sync::atomic::{AtomicU32, Ordering},
-};
+use core::sync::atomic::{AtomicU32, Ordering};
 
 use alloc::{
     collections::{btree_map::BTreeMap, vec_deque::VecDeque},
@@ -19,7 +16,18 @@ use crate::{
     },
 };
 
-use super::{signal::RaiseResult, ProcessGroup, ProcessList, Session, Signal, Thread};
+use super::{
+    process_group::ProcessGroupBuilder, signal::RaiseResult, thread::ThreadBuilder, ProcessGroup,
+    ProcessList, Session, Signal, Thread,
+};
+
+pub struct ProcessBuilder {
+    mm_list: Option<MMList>,
+    parent: Option<Arc<Process>>,
+    thread_builder: Option<ThreadBuilder>,
+    pgroup: Option<Arc<ProcessGroup>>,
+    session: Option<Arc<Session>>,
+}
 
 #[derive(Debug)]
 pub struct Process {
@@ -103,9 +111,9 @@ impl WaitType {
     pub fn to_wstatus(self) -> u32 {
         match self {
             WaitType::Exited(status) => (status & 0xff) << 8,
-            WaitType::Signaled(signal) if signal.is_coredump() => signal.to_signum() | 0x80,
-            WaitType::Signaled(signal) => signal.to_signum(),
-            WaitType::Stopped(signal) => 0x7f | (signal.to_signum() << 8),
+            WaitType::Signaled(signal) if signal.is_coredump() => u32::from(signal) | 0x80,
+            WaitType::Signaled(signal) => u32::from(signal),
+            WaitType::Stopped(signal) => 0x7f | (u32::from(signal) << 8),
             WaitType::Continued => 0xffff,
         }
     }
@@ -125,47 +133,54 @@ impl WaitObject {
     }
 }
 
-/// PID 0 and 1 is created manually so we start from 2.
-static NEXT_PID: AtomicU32 = AtomicU32::new(2);
-impl Process {
-    pub fn alloc_pid() -> u32 {
-        NEXT_PID.fetch_add(1, Ordering::Relaxed)
+impl ProcessBuilder {
+    pub fn new() -> Self {
+        Self {
+            mm_list: None,
+            parent: None,
+            thread_builder: None,
+            pgroup: None,
+            session: None,
+        }
     }
 
-    pub fn new_cloned(other: &Arc<Self>, procs: &mut ProcessList) -> Arc<Self> {
-        let procs_addr = addr_of!(*procs);
+    pub fn mm_list(mut self, mm_list: MMList) -> Self {
+        self.mm_list = Some(mm_list);
+        self
+    }
 
-        // SAFETY: We are holding the process list lock.
-        let other_pgroup = unsafe { other.pgroup.load_locked().unwrap() };
-        let other_session = unsafe { other.session.load_locked().unwrap() };
+    pub fn parent(mut self, parent: Arc<Process>) -> Self {
+        self.parent = Some(parent);
+        self
+    }
 
-        let process = Arc::new(Self {
-            pid: Self::alloc_pid(),
-            wait_list: WaitList::new(),
-            mm_list: MMList::new_cloned(&other.mm_list),
-            parent: RCUPointer::new_with(other.clone()),
-            pgroup: RCUPointer::new_with(other_pgroup.clone()),
-            session: RCUPointer::new_with(other_session.clone()),
-            inner: Locked::new(
-                ProcessInner {
-                    children: BTreeMap::new(),
-                    threads: BTreeMap::new(),
-                },
-                procs_addr,
-            ),
-        });
+    pub fn thread_builder(mut self, thread_builder: ThreadBuilder) -> Self {
+        self.thread_builder = Some(thread_builder);
+        self
+    }
+
+    pub fn pgroup(mut self, pgroup: Arc<ProcessGroup>) -> Self {
+        self.pgroup = Some(pgroup);
+        self
+    }
+
+    pub fn session(mut self, session: Arc<Session>) -> Self {
+        self.session = Some(session);
+        self
+    }
 
-        procs.add_process(&process);
-        other.add_child(&process, procs.as_pos_mut());
-        other_pgroup.add_member(&process, procs.as_pos_mut());
-        process
+    fn alloc_pid() -> u32 {
+        static NEXT_PID: AtomicU32 = AtomicU32::new(1);
+        NEXT_PID.fetch_add(1, Ordering::Relaxed)
     }
 
-    pub(super) unsafe fn new_for_init(pid: u32, procs: &mut ProcessList) -> Arc<Self> {
-        Arc::new(Self {
-            pid,
+    pub fn build(self, process_list: &mut ProcessList) -> (Arc<Thread>, Arc<Process>) {
+        let mm_list = self.mm_list.unwrap_or_else(|| MMList::new());
+
+        let process = Arc::new(Process {
+            pid: Self::alloc_pid(),
             wait_list: WaitList::new(),
-            mm_list: MMList::new(),
+            mm_list,
             parent: RCUPointer::empty(),
             pgroup: RCUPointer::empty(),
             session: RCUPointer::empty(),
@@ -174,11 +189,50 @@ impl Process {
                     children: BTreeMap::new(),
                     threads: BTreeMap::new(),
                 },
-                procs,
+                process_list,
             ),
-        })
+        });
+
+        process_list.add_process(&process);
+
+        let thread_builder = self.thread_builder.expect("Thread builder is not set");
+        let thread = thread_builder
+            .process(process.clone())
+            .tid(process.pid)
+            .build(process_list);
+
+        let session = match self.session {
+            Some(session) => session,
+            None => Session::new(&process, process_list),
+        };
+
+        let pgroup = match self.pgroup {
+            Some(pgroup) => {
+                pgroup.add_member(&process, process_list.as_pos_mut());
+                pgroup
+            }
+            None => ProcessGroupBuilder::new()
+                .leader(&process)
+                .session(session.clone())
+                .build(process_list),
+        };
+
+        if let Some(parent) = &self.parent {
+            parent.add_child(&process, process_list.as_pos_mut());
+        }
+
+        // SAFETY: We are holding the process list lock.
+        unsafe {
+            process.parent.swap(self.parent);
+            process.pgroup.swap(Some(pgroup));
+            process.session.swap(Some(session));
+        }
+
+        (thread, process)
     }
+}
 
+impl Process {
     pub fn raise(&self, signal: Signal, procs: RefPosition<'_, ProcessList>) {
         let inner = self.inner.access(procs);
         for thread in inner.threads.values().map(|t| t.upgrade().unwrap()) {
@@ -254,20 +308,23 @@ impl Process {
 
     /// Create a new session for the process.
     pub fn setsid(self: &Arc<Self>) -> KResult<u32> {
-        let mut procs = ProcessList::get().lock();
+        let mut process_list = ProcessList::get().lock();
         // If there exists a session that has the same sid as our pid, we can't create a new
         // session. The standard says that we should create a new process group and be the
         // only process in the new process group and session.
-        if procs.try_find_session(self.pid).is_some() {
+        if process_list.try_find_session(self.pid).is_some() {
             return Err(EPERM);
         }
-        let session = Session::new(procs.as_mut(), self);
-        let pgroup = session.new_group(procs.as_mut(), self);
+        let session = Session::new(self, &mut process_list);
+        let pgroup = ProcessGroupBuilder::new()
+            .leader(self)
+            .session(session.clone())
+            .build(&mut process_list);
 
         {
             let _old_session = unsafe { self.session.swap(Some(session.clone())) }.unwrap();
             let old_pgroup = unsafe { self.pgroup.swap(Some(pgroup.clone())) }.unwrap();
-            old_pgroup.remove_member(self.pid, procs.as_pos_mut());
+            old_pgroup.remove_member(self.pid, process_list.as_pos_mut());
             rcu_sync();
         }
 
@@ -308,7 +365,10 @@ impl Process {
                 return Err(EPERM);
             }
 
-            session.new_group(procs, self)
+            ProcessGroupBuilder::new()
+                .leader(self)
+                .session(session.clone())
+                .build(procs)
         };
 
         pgroup.remove_member(self.pid, procs.as_pos_mut());

+ 40 - 18
src/kernel/task/process_group.rs

@@ -10,7 +10,12 @@ use crate::{
 
 use super::{Process, ProcessList, Session, Signal};
 
-#[allow(dead_code)]
+pub struct ProcessGroupBuilder {
+    pgid: Option<u32>,
+    leader: Option<Weak<Process>>,
+    session: Option<Arc<Session>>,
+}
+
 #[derive(Debug)]
 pub struct ProcessGroup {
     pub pgid: u32,
@@ -20,28 +25,45 @@ pub struct ProcessGroup {
     pub processes: Locked<BTreeMap<u32, Weak<Process>>, ProcessList>,
 }
 
-impl ProcessGroup {
-    /// Don't use this function directly. Use `Session::new_group` instead.
-    pub(super) fn new(
-        leader: &Arc<Process>,
-        session: Weak<Session>,
-        procs: &mut ProcessList,
-    ) -> Arc<Self> {
-        let pgroup = Arc::new(Self {
-            pgid: leader.pid,
-            leader: Arc::downgrade(leader),
-            session,
-            processes: Locked::new(
-                BTreeMap::from([(leader.pid, Arc::downgrade(leader))]),
-                // SAFETY: `procs` must be the global process list, which won't be moved.
-                procs,
-            ),
+impl ProcessGroupBuilder {
+    pub const fn new() -> Self {
+        Self {
+            pgid: None,
+            leader: None,
+            session: None,
+        }
+    }
+
+    pub fn leader(mut self, leader: &Arc<Process>) -> Self {
+        self.pgid = Some(leader.pid);
+        self.leader = Some(Arc::downgrade(leader));
+        self
+    }
+
+    pub fn session(mut self, session: Arc<Session>) -> Self {
+        self.session = Some(session);
+        self
+    }
+
+    pub fn build(self, process_list: &mut ProcessList) -> Arc<ProcessGroup> {
+        let pgid = self.pgid.expect("PGID is not set");
+        let leader = self.leader.expect("Leader is not set");
+        let session = self.session.expect("Session is not set");
+
+        let pgroup = Arc::new(ProcessGroup {
+            pgid,
+            session: Arc::downgrade(&session),
+            processes: Locked::new(BTreeMap::from([(pgid, leader.clone())]), process_list),
+            leader,
         });
 
-        procs.add_pgroup(&pgroup);
+        process_list.add_pgroup(&pgroup);
+        session.add_member(process_list, &pgroup);
         pgroup
     }
+}
 
+impl ProcessGroup {
     pub(super) fn add_member(
         &self,
         process: &Arc<Process>,

+ 25 - 60
src/kernel/task/process_list.rs

@@ -12,16 +12,14 @@ use crate::{
 
 use lazy_static::lazy_static;
 
-use super::{Process, ProcessGroup, Scheduler, Session, Signal, Thread, WaitObject, WaitType};
+use super::{Process, ProcessGroup, Session, Signal, Thread, WaitObject, WaitType};
 
 pub struct ProcessList {
     /// The init process.
     init: Option<Arc<Process>>,
-    /// The kernel idle process.
-    idle: Option<Arc<Process>>,
-    /// All threads except the idle thread.
+    /// All threads.
     threads: BTreeMap<u32, Arc<Thread>>,
-    /// All processes except the idle process.
+    /// All processes.
     processes: BTreeMap<u32, Weak<Process>>,
     /// All process groups.
     pgroups: BTreeMap<u32, Weak<ProcessGroup>>,
@@ -33,7 +31,6 @@ lazy_static! {
     static ref GLOBAL_PROC_LIST: RwSemaphore<ProcessList> = {
         RwSemaphore::new(ProcessList {
             init: None,
-            idle: None,
             threads: BTreeMap::new(),
             processes: BTreeMap::new(),
             pgroups: BTreeMap::new(),
@@ -64,10 +61,18 @@ impl ProcessList {
     }
 
     pub fn kill_current(signal: Signal) -> ! {
-        ProcessList::get()
-            .lock()
-            .do_kill_process(&Thread::current().process, WaitType::Signaled(signal));
-        Scheduler::schedule_noreturn()
+        unsafe {
+            let mut process_list = ProcessList::get().lock();
+            preempt::disable();
+
+            // SAFETY: Preemption disabled.
+            process_list.do_kill_process(&Thread::current().process, WaitType::Signaled(signal));
+        }
+
+        unsafe {
+            // SAFETY: Preempt count == 1.
+            Thread::runnable().exit();
+        }
     }
 
     pub fn remove_process(&mut self, pid: u32) {
@@ -94,12 +99,13 @@ impl ProcessList {
         }
     }
 
-    pub fn init_process(&self) -> &Arc<Process> {
-        self.init.as_ref().unwrap()
+    pub fn set_init_process(&mut self, init: Arc<Process>) {
+        let old_init = self.init.replace(init);
+        assert!(old_init.is_none(), "Init process already set");
     }
 
-    pub fn idle_process(&self) -> &Arc<Process> {
-        self.idle.as_ref().unwrap()
+    pub fn init_process(&self) -> &Arc<Process> {
+        self.init.as_ref().unwrap()
     }
 
     pub fn try_find_thread(&self, tid: u32) -> Option<&Arc<Thread>> {
@@ -119,23 +125,19 @@ impl ProcessList {
     }
 
     /// Make the process a zombie and notify the parent.
-    pub fn do_kill_process(&mut self, process: &Arc<Process>, status: WaitType) {
-        if self.idle_process().pid == process.pid {
-            panic!("idle exited");
-        }
-
-        if self.init_process().pid == process.pid {
+    /// # Safety
+    /// This function needs to be called with preemption disabled.
+    pub unsafe fn do_kill_process(&mut self, process: &Arc<Process>, status: WaitType) {
+        if process.pid == 1 {
             panic!("init exited");
         }
 
-        preempt::disable();
-
         let inner = process.inner.access_mut(self.as_pos_mut());
         // TODO!!!!!!: When we are killing multiple threads, we need to wait until all
         // the threads are stopped then proceed.
         for thread in inner.threads.values().map(|t| t.upgrade().unwrap()) {
             assert!(thread.tid == Thread::current().tid);
-            Scheduler::get().lock().set_zombie(&thread);
+            // TODO: Send SIGKILL to all threads.
             thread.files.close_all();
         }
 
@@ -181,42 +183,5 @@ impl ProcessList {
             },
             self.as_pos(),
         );
-
-        preempt::enable();
     }
 }
-
-pub unsafe fn init_multitasking(init_fn: unsafe extern "C" fn()) {
-    let mut procs = ProcessList::get().lock();
-
-    let init_process = Process::new_for_init(1, procs.as_mut());
-    let init_thread = Thread::new_for_init(
-        Arc::from(b"[kernel kinit]".as_slice()),
-        1,
-        &init_process,
-        procs.as_mut(),
-    );
-
-    let init_session = Session::new(procs.as_mut(), &init_process);
-    let init_pgroup = init_session.new_group(procs.as_mut(), &init_process);
-
-    assert!(init_process.session.swap(Some(init_session)).is_none());
-    assert!(init_process.pgroup.swap(Some(init_pgroup)).is_none());
-
-    let idle_process = Process::new_for_init(0, procs.as_mut());
-    let idle_thread = Thread::new_for_init(
-        Arc::from(b"[kernel idle#BS]".as_slice()),
-        0,
-        &idle_process,
-        procs.as_mut(),
-    );
-
-    procs.init = Some(init_process);
-    procs.idle = Some(idle_process);
-
-    let mut scheduler = Scheduler::get().lock_irq();
-
-    init_thread.init(init_fn as usize);
-    scheduler.uwake(&init_thread);
-    Scheduler::set_idle_and_current(idle_thread);
-}

+ 48 - 0
src/kernel/task/readyqueue.rs

@@ -0,0 +1,48 @@
+use alloc::{collections::VecDeque, sync::Arc};
+
+use crate::sync::Spin;
+
+use super::Task;
+
+#[arch::define_percpu]
+static READYQUEUE: Option<Spin<FifoReadyQueue>> = None;
+
+pub trait ReadyQueue {
+    fn get(&mut self) -> Option<Arc<Task>>;
+    fn put(&mut self, thread: Arc<Task>);
+}
+
+pub struct FifoReadyQueue {
+    threads: VecDeque<Arc<Task>>,
+}
+
+impl FifoReadyQueue {
+    pub const fn new() -> Self {
+        FifoReadyQueue {
+            threads: VecDeque::new(),
+        }
+    }
+}
+
+impl ReadyQueue for FifoReadyQueue {
+    fn get(&mut self) -> Option<Arc<Task>> {
+        self.threads.pop_front()
+    }
+
+    fn put(&mut self, thread: Arc<Task>) {
+        self.threads.push_back(thread);
+    }
+}
+
+pub fn rq_thiscpu() -> &'static Spin<dyn ReadyQueue> {
+    // SAFETY: When we use ReadyQueue on this CPU, we will lock it with `lock_irq()`
+    //         and if we use ReadyQueue on other CPU, we won't be able to touch it on this CPU.
+    //         So no issue here.
+    unsafe { READYQUEUE.as_ref() }
+        .as_ref()
+        .expect("ReadyQueue should be initialized")
+}
+
+pub fn init_rq_thiscpu() {
+    READYQUEUE.set(Some(Spin::new(FifoReadyQueue::new())));
+}

+ 134 - 158
src/kernel/task/scheduler.rs

@@ -1,162 +1,118 @@
 use core::{
+    future::Future,
+    pin::Pin,
     ptr::NonNull,
-    sync::atomic::{compiler_fence, fence, Ordering},
+    sync::atomic::{compiler_fence, Ordering},
+    task::{Context, Poll, Waker},
 };
 
-use crate::{prelude::*, sync::preempt};
+use crate::{kernel::console::println_trace, prelude::*, sync::preempt};
 
-use alloc::{collections::vec_deque::VecDeque, sync::Arc};
+use alloc::sync::Arc;
+
+use intrusive_collections::RBTree;
 use lazy_static::lazy_static;
 
-use super::{Thread, ThreadState};
+use super::{
+    init_rq_thiscpu,
+    readyqueue::rq_thiscpu,
+    task::{FutureRunnable, TaskAdapter, TaskHandle, TaskOutput},
+    Task,
+};
 
-pub struct Scheduler {
-    ready: VecDeque<Arc<Thread>>,
-}
+pub struct Scheduler;
 
-/// Idle task thread
-/// All the idle task threads belongs to `pid 0` and are pinned to the current cpu.
+pub struct JoinHandle<Output>(Arc<Spin<TaskOutput<Output>>>)
+where
+    Output: Send;
+
+/// Idle task
+/// All the idle tasks are pinned to the current cpu.
 #[arch::define_percpu]
-static IDLE_TASK: Option<NonNull<Thread>> = None;
+static IDLE_TASK: Option<NonNull<Task>> = None;
 
-/// Current thread
+/// Current running task
 #[arch::define_percpu]
-static CURRENT: Option<NonNull<Thread>> = None;
+static CURRENT: Option<NonNull<Task>> = None;
 
 lazy_static! {
-    static ref GLOBAL_SCHEDULER: Spin<Scheduler> = Spin::new(Scheduler {
-        ready: VecDeque::new(),
-    });
+    static ref TASKS: Spin<RBTree<TaskAdapter>> = Spin::new(RBTree::new(TaskAdapter::new()));
 }
 
-impl Scheduler {
-    /// `Scheduler` might be used in various places. Do not hold it for a long time.
-    ///
-    /// # Safety
-    /// The locked returned by this function should be locked with `lock_irq` to prevent from
-    /// rescheduling during access to the scheduler. Disabling preemption will do the same.
-    ///
-    /// Drop the lock before calling `schedule`.
-    pub fn get() -> &'static Spin<Self> {
-        &GLOBAL_SCHEDULER
-    }
-
+impl Task {
     /// # Safety
     /// We should never "inspect" a change in `current`.
     /// The change of `CURRENT` will only happen in the scheduler. And if we are preempted,
     /// when we DO return, the `CURRENT` will be the same and remain valid.
-    pub fn current<'lt>() -> BorrowedArc<'lt, Thread> {
+    pub fn current<'a>() -> BorrowedArc<'a, Task> {
         BorrowedArc::from_raw(CURRENT.get().unwrap().as_ptr())
     }
 
     /// # Safety
     /// Idle task should never change so we can borrow it without touching the refcount.
-    pub fn idle_task() -> BorrowedArc<'static, Thread> {
+    pub fn idle() -> BorrowedArc<'static, Task> {
         BorrowedArc::from_raw(IDLE_TASK.get().unwrap().as_ptr())
     }
 
-    pub unsafe fn set_idle_and_current(thread: Arc<Thread>) {
-        // We don't wake the idle thread to prevent from accidentally being scheduled there.
-        thread.init(idle_task as *const () as usize);
-        assert_eq!(
-            thread.oncpu.swap(true, Ordering::AcqRel),
-            false,
-            "Idle task is already on cpu"
-        );
-
-        let old = IDLE_TASK.swap(NonNull::new(Arc::into_raw(thread.clone()) as *mut _));
-        assert!(old.is_none(), "Idle task is already set");
-
-        let old = CURRENT.swap(NonNull::new(Arc::into_raw(thread) as *mut _));
-        assert!(old.is_none(), "Current is already set");
-    }
-
-    pub fn pop(&mut self) -> Option<Arc<Thread>> {
-        self.ready.pop_front()
+    pub fn add(task: Arc<Self>) {
+        TASKS.lock().insert(task);
     }
 
-    pub unsafe fn swap_current(&mut self, next: Arc<Thread>) {
-        {
-            let mut next_state = next.state.lock();
-            assert_eq!(*next_state, ThreadState::Ready);
-            *next_state = ThreadState::Running;
-            assert_eq!(next.oncpu.swap(true, Ordering::AcqRel), false);
-        }
-
-        let old: Option<NonNull<Thread>> =
-            CURRENT.swap(NonNull::new(Arc::into_raw(next) as *mut _));
-
-        if let Some(thread_pointer) = old {
-            let thread = Arc::from_raw(thread_pointer.as_ptr());
-            let mut state = thread.state.lock();
-            assert_eq!(thread.oncpu.swap(false, Ordering::AcqRel), true);
-
-            if let ThreadState::Running = *state {
-                *state = ThreadState::Ready;
-                self.enqueue(&thread);
-            }
-        }
+    pub fn remove(&self) {
+        unsafe { TASKS.lock().cursor_mut_from_ptr(self as *const _) }.remove();
     }
+}
 
-    fn enqueue(&mut self, thread: &Arc<Thread>) {
-        self.ready.push_back(thread.clone());
+impl Scheduler {
+    /// `Scheduler` might be used in various places. Do not hold it for a long time.
+    ///
+    /// # Safety
+    /// The locked returned by this function should be locked with `lock_irq` to prevent from
+    /// rescheduling during access to the scheduler. Disabling preemption will do the same.
+    ///
+    /// Drop the lock before calling `schedule`.
+    pub fn get() -> &'static Self {
+        static GLOBAL_SCHEDULER: Scheduler = Scheduler;
+        &GLOBAL_SCHEDULER
     }
 
-    pub fn usleep(&mut self, thread: &Arc<Thread>) {
-        let mut state = thread.state.lock();
-        assert_eq!(*state, ThreadState::Running);
-        // No need to dequeue. We have proved that the thread is running so not in the queue.
+    pub fn init_scheduler_thiscpu() {
+        let runnable = FutureRunnable::new(idle_task());
+        let (init_task, _) = Self::extract_handle(Task::new(runnable));
+        TASKS.lock().insert(init_task.clone());
 
-        *state = ThreadState::USleep;
+        init_rq_thiscpu();
+        Self::set_idle_and_current(init_task);
     }
 
-    pub fn uwake(&mut self, thread: &Arc<Thread>) {
-        let mut state = thread.state.lock();
-        assert_eq!(*state, ThreadState::USleep);
+    pub fn set_idle_and_current(task: Arc<Task>) {
+        task.set_usleep();
 
-        if thread.oncpu.load(Ordering::Acquire) {
-            *state = ThreadState::Running;
-        } else {
-            *state = ThreadState::Ready;
-            self.enqueue(&thread);
-        }
-    }
-
-    pub fn isleep(&mut self, thread: &Arc<Thread>) {
-        let mut state = thread.state.lock();
-        assert_eq!(*state, ThreadState::Running);
-        // No need to dequeue. We have proved that the thread is running so not in the queue.
+        let old = IDLE_TASK.swap(NonNull::new(Arc::into_raw(task.clone()) as *mut _));
+        assert!(old.is_none(), "Idle task is already set");
 
-        *state = ThreadState::ISleep;
+        let old = CURRENT.swap(NonNull::new(Arc::into_raw(task) as *mut _));
+        assert!(old.is_none(), "Current is already set");
     }
 
-    pub fn iwake(&mut self, thread: &Arc<Thread>) {
-        let mut state = thread.state.lock();
-
-        match *state {
-            ThreadState::Ready | ThreadState::Running | ThreadState::USleep => return,
-            ThreadState::ISleep => {
-                if thread.oncpu.load(Ordering::Acquire) {
-                    *state = ThreadState::Running;
-                } else {
-                    *state = ThreadState::Ready;
-                    self.enqueue(&thread);
-                }
-            }
-            state => panic!("Invalid transition from state {:?} to `Ready`", state),
+    pub fn activate(&self, task: &Arc<Task>) {
+        // TODO: Select an appropriate ready queue to enqueue.
+        if !task.on_rq.swap(true, Ordering::AcqRel) {
+            rq_thiscpu().lock_irq().put(task.clone());
         }
     }
 
-    /// Set `Running` threads to the `Zombie` state.
-    pub fn set_zombie(&mut self, thread: &Arc<Thread>) {
-        let mut state = thread.state.lock();
-        assert_eq!(*state, ThreadState::Running);
+    pub fn spawn<O>(&self, task: TaskHandle<O>) -> JoinHandle<O>
+    where
+        O: Send,
+    {
+        let (task, output) = Self::extract_handle(task);
+        TASKS.lock().insert(task.clone());
+        self.activate(&task);
 
-        *state = ThreadState::Zombie;
+        JoinHandle(output)
     }
-}
 
-impl Scheduler {
     /// Go to idle task. Call this with `preempt_count == 1`.
     /// The preempt count will be decremented by this function.
     ///
@@ -173,7 +129,7 @@ impl Scheduler {
         //
         // Since we might never return to here, we can't take ownership of `current()`.
         // Is it safe to believe that `current()` will never change across calls?
-        context_switch_light(&Thread::current(), &Scheduler::idle_task());
+        Task::switch(&Task::current(), &Task::idle());
         preempt::enable();
     }
 
@@ -182,61 +138,81 @@ impl Scheduler {
         Self::schedule();
         panic!("Scheduler::schedule_noreturn(): Should never return")
     }
-}
 
-fn context_switch_light(from: &Arc<Thread>, to: &Arc<Thread>) {
-    unsafe {
-        arch::TaskContext::switch_to(
-            &mut *from.get_context_mut_ptr(),
-            &mut *to.get_context_mut_ptr(),
-        );
+    pub async fn yield_now() {
+        struct Yield(bool);
+
+        impl Future for Yield {
+            type Output = ();
+
+            fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
+                match *self {
+                    Yield(true) => Poll::Ready(()),
+                    Yield(false) => {
+                        self.set(Yield(true));
+                        cx.waker().wake_by_ref();
+                        Poll::Pending
+                    }
+                }
+            }
+        }
+
+        Yield(false).await
     }
 }
 
-/// In this function, we should see `preempt_count == 1`.
-extern "C" fn idle_task() {
-    loop {
-        debug_assert_eq!(preempt::count(), 1);
+async fn idle_task() {
+    preempt::disable();
+    let mut cx = Context::from_waker(Waker::noop());
 
-        let mut scheduler = Scheduler::get().lock_irq();
-        let state = *Thread::current().state.lock();
+    loop {
+        debug_assert_eq!(
+            preempt::count(),
+            1,
+            "Scheduler::idle_task() preempt count != 1"
+        );
 
-        // No other thread to run
-        match scheduler.pop() {
+        let next = rq_thiscpu().lock().get();
+        match next {
+            None if Task::current().is_runnable() => {
+                println_trace!(
+                    "trace_scheduler",
+                    "Returning to task id({}) without doing context switch",
+                    Task::current().id
+                );
+
+                // Previous thread is `Running`, return to the current running thread.
+                Task::current().run(&mut cx);
+            }
             None => {
-                drop(scheduler);
-                if let ThreadState::Running = state {
-                    // Previous thread is `Running`, Return to current running thread
-                    // without changing its state.
-                    context_switch_light(&Scheduler::idle_task(), &Thread::current());
-                } else {
-                    // Halt the cpu and rerun the loop.
-                    arch::halt();
-                }
-                continue;
+                // Halt the cpu and rerun the loop.
+                arch::halt();
             }
             Some(next) => {
-                next.process.mm_list.switch_page_table();
-                unsafe { scheduler.swap_current(next) };
-                drop(scheduler);
-            }
-        }
+                println_trace!(
+                    "trace_scheduler",
+                    "Switching from task id({}) to task id({})",
+                    Task::current().id,
+                    next.id
+                );
+
+                debug_assert_ne!(next.id, Task::current().id, "Switching to the same task");
+
+                if let Some(task_pointer) =
+                    CURRENT.swap(NonNull::new(Arc::into_raw(next) as *mut _))
+                {
+                    let task = unsafe { Arc::from_raw(task_pointer.as_ptr()) };
+                    let mut rq = rq_thiscpu().lock();
+
+                    if task.is_runnable() {
+                        rq.put(task);
+                    } else {
+                        task.on_rq.store(false, Ordering::Release);
+                    }
+                }
 
-        unsafe {
-            // SAFETY: We are in the idle task where preemption is disabled.
-            //         So we can safely load the thread area and interrupt stack.
-            Thread::current().load_interrupt_stack();
-            Thread::current().load_thread_area32();
+                Task::current().run(&mut cx);
+            }
         }
-
-        // TODO!!!: If the task comes from another cpu, we need to sync.
-        //
-        // The other cpu should see the changes of kernel stack of the target thread
-        // made in this cpu.
-        //
-        // Can we find a better way other than `fence`s?
-        fence(Ordering::SeqCst);
-        context_switch_light(&Scheduler::idle_task(), &Thread::current());
-        fence(Ordering::SeqCst);
     }
 }

+ 6 - 14
src/kernel/task/session.rs

@@ -31,7 +31,7 @@ pub struct Session {
 
 impl Session {
     /// Create a session and add it to the global session list.
-    pub(super) fn new(procs: &mut ProcessList, leader: &Arc<Process>) -> Arc<Self> {
+    pub fn new(leader: &Arc<Process>, process_list: &mut ProcessList) -> Arc<Self> {
         let session = Arc::new(Self {
             sid: leader.pid,
             leader: Arc::downgrade(leader),
@@ -42,26 +42,18 @@ impl Session {
             groups: Locked::new(
                 BTreeMap::new(),
                 // SAFETY: `procs` must be the global process list, which won't be moved.
-                procs,
+                process_list,
             ),
         });
 
-        procs.add_session(&session);
+        process_list.add_session(&session);
         session
     }
 
-    pub(super) fn new_group(
-        self: &Arc<Self>,
-        procs: &mut ProcessList,
-        leader: &Arc<Process>,
-    ) -> Arc<ProcessGroup> {
-        let pgroup = ProcessGroup::new(leader, Arc::downgrade(self), procs);
+    pub(super) fn add_member(&self, procs: &mut ProcessList, pgroup: &Arc<ProcessGroup>) {
         let groups = self.groups.access_mut(procs.as_pos_mut());
-        assert!(groups
-            .insert(pgroup.pgid, Arc::downgrade(&pgroup))
-            .is_none());
-
-        pgroup
+        let old = groups.insert(pgroup.pgid, Arc::downgrade(pgroup));
+        assert!(old.is_none(), "Process group already exists");
     }
 
     pub(super) fn remove_member(&self, pgid: u32, procs: RefMutPosition<'_, ProcessList>) {

+ 115 - 57
src/kernel/task/signal.rs

@@ -1,4 +1,4 @@
-use core::cmp::Reverse;
+use core::{cmp::Reverse, task::Waker};
 
 use crate::{
     io::BufferFill,
@@ -7,13 +7,14 @@ use crate::{
         user::{dataflow::UserBuffer, UserPointer},
     },
     prelude::*,
+    sync::{preempt, AsRefPosition as _},
 };
 
 use alloc::collections::{binary_heap::BinaryHeap, btree_map::BTreeMap};
 use arch::{ExtendedContext, InterruptContext};
 use bindings::{EFAULT, EINVAL};
 
-use super::{ProcessList, Thread};
+use super::{ProcessList, Scheduler, Task, Thread, WaitObject, WaitType};
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
 pub struct Signal(u32);
@@ -63,67 +64,78 @@ pub struct SignalAction {
     pub sa_mask: usize,
 }
 
-#[derive(Debug, Clone)]
+#[derive(Debug)]
 struct SignalListInner {
     mask: u64,
     pending: BinaryHeap<Reverse<Signal>>,
 
+    signal_waker: Option<Waker>,
+    stop_waker: Option<Waker>,
+
     // TODO!!!!!: Signal disposition should be per-process.
     handlers: BTreeMap<Signal, SignalAction>,
 }
 
-#[derive(Debug, Clone)]
+#[derive(Debug)]
 pub struct SignalList {
     /// We might use this inside interrupt handler, so we need to use `lock_irq`.
     inner: Spin<SignalListInner>,
 }
 
+impl Clone for SignalList {
+    fn clone(&self) -> Self {
+        let inner = self.inner.lock();
+
+        debug_assert!(
+            inner.stop_waker.is_none(),
+            "We should not have a stop waker here"
+        );
+
+        Self {
+            inner: Spin::new(SignalListInner {
+                mask: inner.mask,
+                pending: BinaryHeap::new(),
+                signal_waker: None,
+                stop_waker: None,
+                handlers: inner.handlers.clone(),
+            }),
+        }
+    }
+}
+
 #[derive(Debug, Clone, Copy)]
 pub enum RaiseResult {
-    ShouldIWakeUp,
-    ShouldUWakeUp,
     Finished,
     Masked,
 }
 
 impl Signal {
-    fn is_continue(&self) -> bool {
-        self == &Self::SIGCONT
-    }
-
-    fn is_stop(&self) -> bool {
-        match self {
-            &Self::SIGSTOP | &Self::SIGTSTP | &Self::SIGTTIN | &Self::SIGTTOU => true,
+    const fn is_ignore(&self) -> bool {
+        match *self {
+            Self::SIGCHLD | Self::SIGURG | Self::SIGWINCH => true,
             _ => false,
         }
     }
 
-    fn is_ignore(&self) -> bool {
-        match self {
-            &Self::SIGCHLD | &Self::SIGURG | &Self::SIGWINCH => true,
+    pub const fn is_now(&self) -> bool {
+        match *self {
+            Self::SIGKILL | Self::SIGSTOP => true,
             _ => false,
         }
     }
 
-    pub fn is_now(&self) -> bool {
-        match self {
-            &Self::SIGKILL | &Self::SIGSTOP => true,
-            _ => false,
-        }
-    }
-
-    pub fn is_coredump(&self) -> bool {
-        match self {
-            &Self::SIGQUIT
-            | &Self::SIGILL
-            | &Self::SIGABRT
-            | &Self::SIGFPE
-            | &Self::SIGSEGV
-            | &Self::SIGBUS
-            | &Self::SIGTRAP
-            | &Self::SIGSYS
-            | &Self::SIGXCPU
-            | &Self::SIGXFSZ => true,
+    pub const fn is_coredump(&self) -> bool {
+        match *self {
+            Self::SIGQUIT
+            | Self::SIGILL
+            | Self::SIGABRT
+            | Self::SIGFPE
+            | Self::SIGSEGV
+            | Self::SIGBUS
+            | Self::SIGTRAP
+            | Self::SIGSYS
+            | Self::SIGXCPU
+            | Self::SIGXFSZ => true,
             _ => false,
         }
     }
@@ -131,10 +143,6 @@ impl Signal {
     fn to_mask(&self) -> u64 {
         1 << (self.0 - 1)
     }
-
-    pub fn to_signum(&self) -> u32 {
-        self.0
-    }
 }
 
 impl TryFrom<u32> for Signal {
@@ -149,6 +157,13 @@ impl TryFrom<u32> for Signal {
     }
 }
 
+impl From<Signal> for u32 {
+    fn from(signal: Signal) -> Self {
+        let Signal(signum) = signal;
+        signum
+    }
+}
+
 impl SignalAction {
     fn default_action() -> Self {
         Self {
@@ -172,7 +187,7 @@ impl SignalAction {
     /// # Might Sleep
     fn handle(
         &self,
-        signum: u32,
+        signal: Signal,
         old_mask: u64,
         int_stack: &mut InterruptContext,
         ext_ctx: &mut ExtendedContext,
@@ -194,7 +209,7 @@ impl SignalAction {
         let mut stack = UserBuffer::new(sp as *mut u8, CONTEXT_SIZE + size_of::<u32>())?;
 
         stack.copy(&restorer_address)?.ok_or(EFAULT)?; // Restorer address
-        stack.copy(&signum)?.ok_or(EFAULT)?; // Restorer address
+        stack.copy(&u32::from(signal))?.ok_or(EFAULT)?; // Restorer address
         stack.copy(&old_mask)?.ok_or(EFAULT)?; // Original signal mask
         stack.copy(ext_ctx)?.ok_or(EFAULT)?; // MMX registers
         stack.copy(int_stack)?.ok_or(EFAULT)?; // Interrupt stack
@@ -246,16 +261,21 @@ impl SignalListInner {
         self.mask(signal.to_mask());
         self.pending.push(Reverse(signal));
 
-        if signal.is_stop() {
-            return RaiseResult::ShouldIWakeUp;
-        }
+        match signal {
+            Signal::SIGCONT => {
+                self.stop_waker.take().map(|waker| waker.wake());
+            }
+            _ => {
+                let waker = self
+                    .signal_waker
+                    .as_ref()
+                    .expect("We should have a signal waker");
 
-        // TODO!!!!!!: Fix this. SIGCONT could wake up USleep threads.
-        if signal.is_continue() {
-            return RaiseResult::ShouldUWakeUp;
+                waker.wake_by_ref();
+            }
         }
 
-        return RaiseResult::ShouldIWakeUp;
+        return RaiseResult::Finished;
     }
 }
 
@@ -265,6 +285,8 @@ impl SignalList {
             inner: Spin::new(SignalListInner {
                 mask: 0,
                 pending: BinaryHeap::new(),
+                signal_waker: None,
+                stop_waker: None,
                 handlers: BTreeMap::new(),
             }),
         }
@@ -310,6 +332,13 @@ impl SignalList {
             .unwrap_or_else(SignalAction::default_action)
     }
 
+    // TODO!!!: Find a better way.
+    pub fn set_signal_waker(&self, waker: Waker) {
+        let mut inner = self.inner.lock_irq();
+        let old_waker = inner.signal_waker.replace(waker);
+        assert!(old_waker.is_none(), "We should not have a waker here");
+    }
+
     /// Clear all signals except for `SIG_IGN`.
     /// This is used when `execve` is called.
     pub fn clear_non_ignore(&self) {
@@ -356,8 +385,7 @@ impl SignalList {
                             inner.mask(handler.sa_mask as u64);
                             old_mask
                         };
-                        let result =
-                            handler.handle(signal.to_signum(), old_mask, int_stack, ext_ctx);
+                        let result = handler.handle(signal, old_mask, int_stack, ext_ctx);
                         if result.is_err() {
                             self.inner.lock_irq().set_mask(old_mask);
                         }
@@ -381,15 +409,45 @@ impl SignalList {
 
             // Default actions.
             match signal {
-                Signal::SIGSTOP => Thread::current().do_stop(Signal::SIGSTOP),
-                Signal::SIGCONT => Thread::current().do_continue(),
+                Signal::SIGSTOP | Signal::SIGTSTP | Signal::SIGTTIN | Signal::SIGTTOU => {
+                    let thread = Thread::current();
+                    if let Some(parent) = thread.process.parent.load() {
+                        parent.notify(
+                            WaitObject {
+                                pid: thread.process.pid,
+                                code: WaitType::Stopped(signal),
+                            },
+                            ProcessList::get().lock_shared().as_pos(),
+                        );
+                    }
+
+                    preempt::disable();
+
+                    // `SIGSTOP` can only be waken up by `SIGCONT` or `SIGKILL`.
+                    // SAFETY: Preempt disabled above.
+                    {
+                        let mut inner = self.inner.lock_irq();
+                        let waker = Waker::from(Task::current().usleep());
+                        let old_waker = inner.stop_waker.replace(waker);
+                        assert!(old_waker.is_none(), "We should not have a waker here");
+                    }
+
+                    Scheduler::schedule();
+
+                    if let Some(parent) = thread.process.parent.load() {
+                        parent.notify(
+                            WaitObject {
+                                pid: thread.process.pid,
+                                code: WaitType::Continued,
+                            },
+                            ProcessList::get().lock_shared().as_pos(),
+                        );
+                    }
+                }
+                Signal::SIGCONT => {}
                 Signal::SIGKILL => ProcessList::kill_current(signal),
                 // Ignored
-                Signal::SIGCHLD | Signal::SIGURG | Signal::SIGWINCH => continue,
-                // "Soft" stops.
-                Signal::SIGTSTP | Signal::SIGTTIN | Signal::SIGTTOU => {
-                    Thread::current().do_stop(signal)
-                }
+                Signal::SIGCHLD | Signal::SIGURG | Signal::SIGWINCH => {}
                 // TODO!!!!!!: Check exit status format.
                 s if s.is_coredump() => ProcessList::kill_current(signal),
                 signal => ProcessList::kill_current(signal),

+ 342 - 0
src/kernel/task/task.rs

@@ -0,0 +1,342 @@
+mod context;
+mod kstack;
+mod runnable;
+
+pub use context::TaskContext;
+pub use runnable::{Contexted, PinRunnable, RunState};
+
+use atomic_unique_refcell::AtomicUniqueRefCell;
+use kstack::KernelStack;
+
+use core::{
+    future::Future,
+    pin::Pin,
+    sync::atomic::{fence, AtomicBool, AtomicU32, Ordering},
+    task::{Context, Poll, Waker},
+};
+
+use alloc::{
+    boxed::Box,
+    sync::{Arc, Weak},
+    task::Wake,
+};
+use intrusive_collections::{intrusive_adapter, KeyAdapter, RBTreeAtomicLink};
+
+use crate::{kernel::task::Scheduler, sync::preempt, Spin};
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
+pub struct TaskId(u32);
+
+#[derive(Debug)]
+pub struct TaskState(AtomicU32);
+
+pub struct UniqueWaker(Arc<Task>);
+
+pub struct TaskHandle<Output: Send> {
+    /// The task itself.
+    task: Arc<Task>,
+    /// The output of the task.
+    output: Arc<Spin<TaskOutput<Output>>>,
+}
+
+pub struct TaskOutput<Output: Send> {
+    output: Option<Output>,
+    waker: Option<Waker>,
+}
+
+/// A `Task` represents a schedulable unit.
+pub struct Task {
+    /// Unique identifier of the task.
+    pub id: TaskId,
+    /// Whether the task is on some run queue.
+    pub(super) on_rq: AtomicBool,
+    /// Executor object.
+    executor: AtomicUniqueRefCell<Option<Pin<Box<dyn Future<Output = ()> + Send>>>>,
+    /// Task execution context.
+    task_context: TaskContext,
+    /// Task state.
+    state: TaskState,
+    /// Link in the global task list.
+    link_task_list: RBTreeAtomicLink,
+}
+
+intrusive_adapter!(pub TaskAdapter = Arc<Task>: Task { link_task_list: RBTreeAtomicLink });
+impl<'a> KeyAdapter<'a> for TaskAdapter {
+    type Key = TaskId;
+    fn get_key(&self, task: &'a Task) -> Self::Key {
+        task.id
+    }
+}
+
+impl Scheduler {
+    pub(super) fn extract_handle<O>(handle: TaskHandle<O>) -> (Arc<Task>, Arc<Spin<TaskOutput<O>>>)
+    where
+        O: Send,
+    {
+        let TaskHandle { task, output } = handle;
+        (task, output)
+    }
+}
+
+impl TaskState {
+    pub const RUNNING: u32 = 0;
+    pub const ISLEEP: u32 = 1;
+    pub const USLEEP: u32 = 2;
+
+    pub const fn new(state: u32) -> Self {
+        Self(AtomicU32::new(state))
+    }
+
+    pub fn swap(&self, state: u32) -> u32 {
+        self.0.swap(state, Ordering::AcqRel)
+    }
+
+    pub fn cmpxchg(&self, current: u32, new: u32) -> u32 {
+        self.0
+            .compare_exchange(current, new, Ordering::AcqRel, Ordering::Acquire)
+            .unwrap_or_else(|x| x)
+    }
+
+    pub fn is_runnable(&self) -> bool {
+        self.0.load(Ordering::Acquire) == Self::RUNNING
+    }
+}
+
+impl Task {
+    pub fn new<R, O>(runnable: R) -> TaskHandle<R::Output>
+    where
+        O: Send,
+        R: PinRunnable<Output = O> + Contexted + Send + 'static,
+    {
+        static ID: AtomicU32 = AtomicU32::new(0);
+
+        let output = Arc::new(Spin::new(TaskOutput {
+            output: None,
+            waker: None,
+        }));
+
+        let kernel_stack = KernelStack::new();
+        let mut task_context = TaskContext::new();
+        task_context.set_sp(kernel_stack.get_stack_bottom());
+
+        let mut executor = Box::pin(Executor::new(kernel_stack, runnable));
+
+        task_context.call2(
+            Self::_executor::<O, R>,
+            [
+                unsafe { executor.as_mut().get_unchecked_mut() } as *mut _ as _,
+                Weak::into_raw(Arc::downgrade(&output)) as usize,
+            ],
+        );
+
+        let task = Arc::new(Self {
+            id: TaskId(ID.fetch_add(1, Ordering::Relaxed)),
+            on_rq: AtomicBool::new(false),
+            executor: AtomicUniqueRefCell::new(Some(executor)),
+            task_context,
+            state: TaskState::new(TaskState::RUNNING),
+            link_task_list: RBTreeAtomicLink::new(),
+        });
+
+        TaskHandle { task, output }
+    }
+
+    pub fn is_runnable(&self) -> bool {
+        self.state.is_runnable()
+    }
+
+    pub(super) fn set_usleep(&self) {
+        let prev_state = self.state.swap(TaskState::USLEEP);
+        assert_eq!(prev_state, TaskState::RUNNING);
+    }
+
+    pub fn usleep(self: &Arc<Self>) -> Arc<UniqueWaker> {
+        // No need to dequeue. We have proved that the task is running so not in the queue.
+        self.set_usleep();
+
+        Arc::new(UniqueWaker(self.clone()))
+    }
+
+    pub fn isleep(self: &Arc<Self>) -> Arc<Self> {
+        // No need to dequeue. We have proved that the task is running so not in the queue.
+        let prev_state = self.state.swap(TaskState::ISLEEP);
+        assert_eq!(prev_state, TaskState::RUNNING);
+
+        self.clone()
+    }
+
+    pub fn switch(from: &Self, to: &Self) {
+        from.task_context.switch_to(&to.task_context);
+    }
+
+    pub fn switch_noreturn(to: &Self) -> ! {
+        to.task_context.switch_noreturn();
+    }
+
+    unsafe extern "C" fn _executor<O, R>(
+        executor: Pin<&mut Executor<R>>,
+        output: *const Spin<TaskOutput<R::Output>>,
+    ) -> !
+    where
+        O: Send,
+        R: PinRunnable<Output = O> + Send + Contexted,
+    {
+        // We get here with preempt count == 1.
+        preempt::enable();
+
+        let output = Weak::from_raw(output);
+        let executor = unsafe { executor.get_unchecked_mut() };
+        let runnable = unsafe { Pin::new_unchecked(&mut executor.runnable) };
+
+        {
+            let waker = Waker::from(Task::current().clone());
+            let output_data = runnable.pinned_join(&waker);
+
+            if let Some(output) = output.upgrade() {
+                let mut output = output.lock();
+                let old = output.output.replace(output_data);
+                debug_assert!(old.is_none(), "Output should be empty");
+
+                if let Some(waker) = output.waker.take() {
+                    waker.wake();
+                }
+            }
+        }
+
+        // SAFETY: We are on the same CPU as the task.
+        executor.finished.store(true, Ordering::Relaxed);
+
+        // Idle task needs preempt count == 1.
+        preempt::disable();
+        Task::switch_noreturn(&Task::idle());
+    }
+
+    pub fn run(&self, cx: &mut Context) {
+        let mut executor = self.executor.borrow();
+        let real_executor = executor.as_mut().expect("Executor should be present");
+
+        if let Poll::Ready(_) = real_executor.as_mut().poll(cx) {
+            executor.take();
+            self.set_usleep();
+            Self::remove(self);
+        }
+    }
+}
+
+impl Wake for Task {
+    fn wake(self: Arc<Self>) {
+        self.wake_by_ref();
+    }
+
+    fn wake_by_ref(self: &Arc<Self>) {
+        match self.state.cmpxchg(TaskState::ISLEEP, TaskState::RUNNING) {
+            TaskState::RUNNING | TaskState::USLEEP => return,
+            TaskState::ISLEEP => Scheduler::get().activate(self),
+            state => panic!("Invalid transition from state {:?} to `Running`", state),
+        }
+    }
+}
+
+impl Wake for UniqueWaker {
+    fn wake(self: Arc<Self>) {
+        self.wake_by_ref();
+    }
+
+    fn wake_by_ref(self: &Arc<Self>) {
+        let Self(task) = &**self;
+
+        let prev_state = task.state.swap(TaskState::RUNNING);
+        assert_eq!(prev_state, TaskState::USLEEP);
+
+        Scheduler::get().activate(task);
+    }
+}
+
+struct Executor<R>
+where
+    R: PinRunnable + Send + Contexted + 'static,
+{
+    _kernel_stack: KernelStack,
+    runnable: R,
+    finished: AtomicBool,
+}
+
+impl<R> Executor<R>
+where
+    R: PinRunnable + Send + Contexted + 'static,
+{
+    pub fn new(kernel_stack: KernelStack, runnable: R) -> Self {
+        Self {
+            _kernel_stack: kernel_stack,
+            runnable,
+            finished: AtomicBool::new(false),
+        }
+    }
+}
+
+impl<R> Future for Executor<R>
+where
+    R: PinRunnable + Send + Contexted + 'static,
+{
+    type Output = ();
+
+    fn poll(self: Pin<&mut Self>, _: &mut Context<'_>) -> Poll<Self::Output> {
+        // TODO!!!: We should load the context only if the previous task is
+        // different from the current task.
+
+        // SAFETY: We don't move the runnable object.
+        let executor = unsafe { self.get_unchecked_mut() };
+        executor.runnable.load_running_context();
+
+        // TODO!!!: If the task comes from another cpu, we need to sync.
+        //
+        // The other cpu should see the changes of kernel stack of the target thread
+        // made in this cpu.
+        //
+        // Can we find a better way other than `fence`s?
+        //
+        // An alternative way is to use an atomic variable to store the cpu id of
+        // the current task. Then we can use acquire release swap to ensure that the
+        // other cpu sees the changes.
+        fence(Ordering::SeqCst);
+
+        Task::switch(&Task::idle(), &Task::current());
+
+        fence(Ordering::SeqCst);
+
+        if executor.finished.load(Ordering::Relaxed) {
+            return Poll::Ready(());
+        }
+
+        return Poll::Pending;
+    }
+}
+
+pub struct FutureRunnable<F: Future>(F);
+
+impl<F> FutureRunnable<F>
+where
+    F: Future,
+{
+    pub const fn new(future: F) -> Self {
+        Self(future)
+    }
+}
+
+impl<F: Future + 'static> Contexted for FutureRunnable<F> {
+    fn load_running_context(&mut self) {}
+}
+
+impl<F: Future + 'static> PinRunnable for FutureRunnable<F> {
+    type Output = F::Output;
+
+    fn pinned_run(self: Pin<&mut Self>, waker: &Waker) -> RunState<Self::Output> {
+        let mut future = unsafe { self.map_unchecked_mut(|me| &mut me.0) };
+        let mut context = Context::from_waker(waker);
+
+        match future.as_mut().poll(&mut context) {
+            Poll::Ready(output) => RunState::Finished(output),
+            Poll::Pending => RunState::Running,
+        }
+    }
+}

+ 51 - 0
src/kernel/task/task/context.rs

@@ -0,0 +1,51 @@
+use core::{cell::UnsafeCell, mem::transmute};
+
+#[derive(Debug)]
+pub struct TaskContext(UnsafeCell<arch::TaskContext>);
+
+unsafe impl Sync for TaskContext {}
+
+impl TaskContext {
+    pub const fn new() -> Self {
+        Self(UnsafeCell::new(arch::TaskContext::new()))
+    }
+
+    pub fn set_ip(&mut self, ip: usize) {
+        let Self(context) = self;
+        context.get_mut().ip(ip);
+    }
+
+    pub fn set_sp(&mut self, sp: usize) {
+        let Self(context) = self;
+        context.get_mut().sp(sp);
+    }
+
+    pub fn set_interrupt(&mut self, is_enabled: bool) {
+        let Self(context) = self;
+        context.get_mut().interrupt(is_enabled);
+    }
+
+    pub fn call2<T, U>(&mut self, func: unsafe extern "C" fn(T, U) -> !, args: [usize; 2]) {
+        let Self(context) = self;
+        context
+            .get_mut()
+            .call2(unsafe { transmute(func as *mut ()) }, args);
+    }
+
+    pub fn switch_to(&self, to: &Self) {
+        let Self(from_ctx) = self;
+        let Self(to_ctx) = to;
+        unsafe {
+            arch::TaskContext::switch(&mut *from_ctx.get(), &mut *to_ctx.get());
+        }
+    }
+
+    pub fn switch_noreturn(&self) -> ! {
+        let mut from_ctx = arch::TaskContext::new();
+        let Self(to_ctx) = self;
+        unsafe {
+            arch::TaskContext::switch(&mut from_ctx, &mut *to_ctx.get());
+        }
+        unreachable!("We should never return from switch_to_noreturn");
+    }
+}

+ 27 - 0
src/kernel/task/task/kstack.rs

@@ -0,0 +1,27 @@
+use crate::kernel::mem::{paging::Page, phys::PhysPtr};
+
+#[derive(Debug)]
+pub struct KernelStack {
+    _pages: Page,
+    bottom: usize,
+}
+
+impl KernelStack {
+    /// Kernel stack page order
+    /// 7 for `2^7 = 128 pages = 512 KiB`
+    const KERNEL_STACK_ORDER: u32 = 7;
+
+    pub fn new() -> Self {
+        let pages = Page::alloc_many(Self::KERNEL_STACK_ORDER);
+        let bottom = pages.as_cached().offset(pages.len()).as_ptr::<u8>() as usize;
+
+        Self {
+            _pages: pages,
+            bottom,
+        }
+    }
+
+    pub fn get_stack_bottom(&self) -> usize {
+        self.bottom
+    }
+}

+ 53 - 0
src/kernel/task/task/runnable.rs

@@ -0,0 +1,53 @@
+use core::{pin::Pin, task::Waker};
+
+pub enum RunState<Output> {
+    Running,
+    Finished(Output),
+}
+
+pub trait Contexted {
+    /// # Safety
+    /// This function will be called in a preemption disabled context.
+    fn load_running_context(&mut self);
+}
+
+pub trait Runnable {
+    type Output;
+
+    fn run(&mut self, waker: &Waker) -> RunState<Self::Output>;
+
+    fn join(&mut self, waker: &Waker) -> Self::Output {
+        loop {
+            match self.run(waker) {
+                RunState::Running => continue,
+                RunState::Finished(output) => break output,
+            }
+        }
+    }
+}
+
+pub trait PinRunnable {
+    type Output;
+
+    fn pinned_run(self: Pin<&mut Self>, waker: &Waker) -> RunState<Self::Output>;
+
+    fn pinned_join(mut self: Pin<&mut Self>, waker: &Waker) -> Self::Output {
+        loop {
+            match self.as_mut().pinned_run(waker) {
+                RunState::Running => continue,
+                RunState::Finished(output) => break output,
+            }
+        }
+    }
+}
+
+impl<R> Runnable for R
+where
+    R: PinRunnable + Unpin,
+{
+    type Output = R::Output;
+
+    fn run(&mut self, waker: &Waker) -> RunState<Self::Output> {
+        Pin::new(self).pinned_run(waker)
+    }
+}

+ 228 - 183
src/kernel/task/thread.rs

@@ -1,34 +1,43 @@
-use core::{
-    arch::naked_asm,
-    cell::{RefCell, UnsafeCell},
-    sync::atomic::AtomicBool,
-};
+use core::{arch::asm, pin::Pin, ptr::NonNull, task::Waker};
 
 use crate::{
-    kernel::{cpu::current_cpu, user::dataflow::CheckedUserPointer, vfs::FsContext},
+    kernel::{
+        cpu::current_cpu,
+        mem::VAddr,
+        user::dataflow::CheckedUserPointer,
+        vfs::{filearray::FileArray, FsContext},
+    },
     prelude::*,
-    sync::{preempt, AsRefMutPosition as _, AsRefPosition as _},
+    sync::{preempt, AsRefMutPosition as _},
 };
 
 use alloc::sync::Arc;
 
-use crate::kernel::vfs::filearray::FileArray;
-
 use super::{
     signal::{RaiseResult, Signal, SignalList},
-    KernelStack, Process, ProcessList, Scheduler, WaitObject, WaitType,
+    task::{Contexted, PinRunnable, RunState},
+    Process, ProcessList, TaskContext,
 };
 
-use arch::{InterruptContext, TaskContext, UserTLS};
+use arch::{InterruptContext, UserTLS, _arch_fork_return};
 
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-pub enum ThreadState {
-    Preparing,
-    Running,
-    Ready,
-    Zombie,
-    ISleep,
-    USleep,
+struct CurrentThread {
+    thread: NonNull<Thread>,
+    runnable: NonNull<ThreadRunnable>,
+}
+
+#[arch::define_percpu]
+static CURRENT_THREAD: Option<CurrentThread> = None;
+
+pub struct ThreadBuilder {
+    tid: Option<u32>,
+    name: Option<Arc<[u8]>>,
+    process: Option<Arc<Process>>,
+    files: Option<Arc<FileArray>>,
+    fs_context: Option<Arc<FsContext>>,
+    signal_list: Option<SignalList>,
+    tls: Option<UserTLS>,
+    set_child_tid: Option<usize>,
 }
 
 #[derive(Debug)]
@@ -52,22 +61,6 @@ pub struct Thread {
     pub fs_context: Arc<FsContext>,
 
     pub signal_list: SignalList,
-
-    /// Thread state for scheduler use.
-    pub state: Spin<ThreadState>,
-
-    pub oncpu: AtomicBool,
-
-    /// Thread context
-    pub context: UnsafeCell<TaskContext>,
-
-    /// Kernel stack
-    /// Never access this directly.
-    ///
-    /// We can only touch kernel stack when the process is neither running nor sleeping.
-    /// AKA, the process is in the ready queue and will return to `schedule` context.
-    kstack: RefCell<KernelStack>,
-
     inner: Spin<ThreadInner>,
 }
 
@@ -111,113 +104,127 @@ impl UserDescriptorFlags {
     }
 }
 
-impl Thread {
-    pub unsafe fn new_for_init(
-        name: Arc<[u8]>,
-        tid: u32,
-        process: &Arc<Process>,
-        procs: &mut ProcessList,
-    ) -> Arc<Self> {
-        let thread = Arc::new(Self {
-            tid,
-            process: process.clone(),
-            files: FileArray::new_for_init(),
-            fs_context: FsContext::new_for_init(),
-            signal_list: SignalList::new(),
-            kstack: RefCell::new(KernelStack::new()),
-            context: UnsafeCell::new(TaskContext::new()),
-            state: Spin::new(ThreadState::Preparing),
-            oncpu: AtomicBool::new(false),
-            inner: Spin::new(ThreadInner {
-                name,
-                tls: None,
-                set_child_tid: 0,
-            }),
-        });
+impl ThreadBuilder {
+    pub fn new() -> Self {
+        Self {
+            tid: None,
+            name: None,
+            process: None,
+            files: None,
+            fs_context: None,
+            signal_list: None,
+            tls: None,
+            set_child_tid: None,
+        }
+    }
 
-        process.add_thread(&thread, procs.as_pos_mut());
-        thread
+    pub fn tid(mut self, tid: u32) -> Self {
+        self.tid = Some(tid);
+        self
+    }
+
+    pub fn name(mut self, name: Arc<[u8]>) -> Self {
+        self.name = Some(name);
+        self
     }
 
-    pub fn new_cloned(&self, procs: &mut ProcessList) -> Arc<Self> {
-        let process = Process::new_cloned(&self.process, procs);
+    pub fn process(mut self, process: Arc<Process>) -> Self {
+        self.process = Some(process);
+        self
+    }
 
-        let state = self.state.lock();
-        let inner = self.inner.lock();
-        assert!(matches!(*state, ThreadState::Running));
+    pub fn files(mut self, files: Arc<FileArray>) -> Self {
+        self.files = Some(files);
+        self
+    }
+
+    pub fn fs_context(mut self, fs_context: Arc<FsContext>) -> Self {
+        self.fs_context = Some(fs_context);
+        self
+    }
+
+    pub fn signal_list(mut self, signal_list: SignalList) -> Self {
+        self.signal_list = Some(signal_list);
+        self
+    }
+
+    pub fn tls(mut self, tls: Option<UserTLS>) -> Self {
+        self.tls = tls;
+        self
+    }
+
+    pub fn set_child_tid(mut self, set_child_tid: usize) -> Self {
+        self.set_child_tid = Some(set_child_tid);
+        self
+    }
+
+    /// Fork the thread from another thread.
+    ///
+    /// Sets the thread's files, fs_context, signal_list, name, tls, and set_child_tid
+    pub fn fork_from(self, thread: &Thread) -> Self {
+        let inner = thread.inner.lock();
+
+        self.files(FileArray::new_cloned(&thread.files))
+            .fs_context(FsContext::new_cloned(&thread.fs_context))
+            .signal_list(thread.signal_list.clone())
+            .name(inner.name.clone())
+            .tls(inner.tls.clone())
+            .set_child_tid(inner.set_child_tid)
+    }
+
+    pub fn build(self, process_list: &mut ProcessList) -> Arc<Thread> {
+        let tid = self.tid.expect("TID is not set");
+        let name = self.name.expect("Name is not set");
+        let process = self.process.expect("Process is not set");
+        let files = self.files.unwrap_or_else(|| FileArray::new());
+        let fs_context = self
+            .fs_context
+            .unwrap_or_else(|| FsContext::global().clone());
+        let signal_list = self.signal_list.unwrap_or_else(|| SignalList::new());
+        let set_child_tid = self.set_child_tid.unwrap_or(0);
 
-        let signal_list = self.signal_list.clone();
         signal_list.clear_pending();
 
-        let thread = Arc::new(Self {
-            tid: process.pid,
+        let thread = Arc::new(Thread {
+            tid,
             process: process.clone(),
-            files: FileArray::new_cloned(&self.files),
-            fs_context: FsContext::new_cloned(&self.fs_context),
+            files,
+            fs_context,
             signal_list,
-            kstack: RefCell::new(KernelStack::new()),
-            context: UnsafeCell::new(TaskContext::new()),
-            state: Spin::new(ThreadState::Preparing),
-            oncpu: AtomicBool::new(false),
             inner: Spin::new(ThreadInner {
-                name: inner.name.clone(),
-                tls: inner.tls.clone(),
-                set_child_tid: inner.set_child_tid,
+                name,
+                tls: self.tls,
+                set_child_tid,
             }),
         });
 
-        procs.add_thread(&thread);
-        process.add_thread(&thread, procs.as_pos_mut());
+        process_list.add_thread(&thread);
+        process.add_thread(&thread, process_list.as_pos_mut());
         thread
     }
+}
 
+impl Thread {
     pub fn current<'lt>() -> BorrowedArc<'lt, Self> {
-        Scheduler::current()
+        // SAFETY: We won't change the thread pointer in the current CPU when
+        // we return here after some preemption.
+        let current: &Option<CurrentThread> = unsafe { CURRENT_THREAD.as_ref() };
+        let current = current.as_ref().expect("Current thread is not set");
+        BorrowedArc::from_raw(current.thread.as_ptr())
     }
 
-    pub fn do_stop(self: &Arc<Self>, signal: Signal) {
-        if let Some(parent) = self.process.parent.load() {
-            parent.notify(
-                WaitObject {
-                    pid: self.process.pid,
-                    code: WaitType::Stopped(signal),
-                },
-                ProcessList::get().lock_shared().as_pos(),
-            );
-        }
+    pub fn runnable<'lt>() -> &'lt ThreadRunnable {
+        // SAFETY: We won't change the thread pointer in the current CPU when
+        // we return here after some preemption.
+        let current: &Option<CurrentThread> = unsafe { CURRENT_THREAD.as_ref() };
+        let current = current.as_ref().expect("Current thread is not set");
 
-        preempt::disable();
-
-        // `SIGSTOP` can only be waken up by `SIGCONT` or `SIGKILL`.
-        // SAFETY: Preempt disabled above.
-        Scheduler::get().lock().usleep(self);
-        Scheduler::schedule();
+        // SAFETY: We can only use the returned value when we are in the context of the thread.
+        unsafe { &*current.runnable.as_ptr() }
     }
 
-    pub fn do_continue(self: &Arc<Self>) {
-        if let Some(parent) = self.process.parent.load() {
-            parent.notify(
-                WaitObject {
-                    pid: self.process.pid,
-                    code: WaitType::Continued,
-                },
-                ProcessList::get().lock_shared().as_pos(),
-            );
-        }
-    }
-
-    pub fn raise(self: &Arc<Thread>, signal: Signal) -> RaiseResult {
-        match self.signal_list.raise(signal) {
-            RaiseResult::ShouldIWakeUp => {
-                Scheduler::get().lock_irq().iwake(self);
-                RaiseResult::Finished
-            }
-            RaiseResult::ShouldUWakeUp => {
-                Scheduler::get().lock_irq().uwake(self);
-                RaiseResult::Finished
-            }
-            result => result,
-        }
+    pub fn raise(self: &Arc<Self>, signal: Signal) -> RaiseResult {
+        self.signal_list.raise(signal)
     }
 
     /// # Safety
@@ -255,84 +262,122 @@ impl Thread {
         Ok(())
     }
 
-    pub fn fork_init(&self, interrupt_context: InterruptContext) {
-        let mut state = self.state.lock();
-        *state = ThreadState::USleep;
+    pub fn set_name(&self, name: Arc<[u8]>) {
+        self.inner.lock().name = name;
+    }
 
-        let sp = self.kstack.borrow().init(interrupt_context);
-        unsafe {
-            self.get_context_mut_ptr()
-                .as_mut()
-                .unwrap()
-                .init(fork_return as usize, sp);
+    pub fn get_name(&self) -> Arc<[u8]> {
+        self.inner.lock().name.clone()
+    }
+}
+
+pub struct ThreadRunnable {
+    thread: Arc<Thread>,
+    /// Interrupt context for the thread initialization.
+    /// We store the kernel stack pointer in one of the fields for now.
+    ///
+    /// TODO: A better way to store the interrupt context.
+    interrupt_context: InterruptContext,
+    return_context: TaskContext,
+}
+
+impl ThreadRunnable {
+    pub fn new(thread: Arc<Thread>, entry: VAddr, stack_pointer: VAddr) -> Self {
+        let (VAddr(entry), VAddr(stack_pointer)) = (entry, stack_pointer);
+
+        let mut interrupt_context = InterruptContext::default();
+        interrupt_context.set_return_address(entry as _, true);
+        interrupt_context.set_stack_pointer(stack_pointer as _, true);
+        interrupt_context.set_interrupt_enabled(true);
+
+        Self {
+            thread,
+            interrupt_context,
+            return_context: TaskContext::new(),
         }
     }
 
-    pub fn init(&self, entry: usize) {
-        let mut state = self.state.lock();
-        *state = ThreadState::USleep;
-        unsafe {
-            self.get_context_mut_ptr()
-                .as_mut()
-                .unwrap()
-                .init(entry, self.get_kstack_bottom());
+    pub fn from_context(thread: Arc<Thread>, interrupt_context: InterruptContext) -> Self {
+        Self {
+            thread,
+            interrupt_context,
+            return_context: TaskContext::new(),
         }
     }
 
     /// # Safety
-    /// This function is unsafe because it accesses the `current_cpu()`, which needs
-    /// to be called in a preemption disabled context.
-    pub unsafe fn load_interrupt_stack(&self) {
-        self.kstack.borrow().load_interrupt_stack();
+    /// This function needs to be called with preempt count == 1.
+    pub unsafe fn exit(&self) -> ! {
+        self.return_context.switch_noreturn();
     }
+}
 
-    pub fn get_kstack_bottom(&self) -> usize {
-        self.kstack.borrow().get_stack_bottom()
-    }
+impl Contexted for ThreadRunnable {
+    fn load_running_context(&mut self) {
+        let thread = self.thread.as_ref();
 
-    pub unsafe fn get_context_mut_ptr(&self) -> *mut TaskContext {
-        self.context.get()
-    }
+        unsafe {
+            // SAFETY: Preemption is disabled.
+            arch::load_interrupt_stack(current_cpu(), self.interrupt_context.int_no as u64);
+        }
 
-    pub fn set_name(&self, name: Arc<[u8]>) {
-        self.inner.lock().name = name;
-    }
+        // SAFETY: Preemption is disabled.
+        unsafe {
+            // SAFETY: `self` and `thread` are valid and non-null.
+            let current_thread = CurrentThread {
+                thread: NonNull::new_unchecked(thread as *const _ as *mut _),
+                runnable: NonNull::new_unchecked(self as *const _ as *mut _),
+            };
 
-    pub fn get_name(&self) -> Arc<[u8]> {
-        self.inner.lock().name.clone()
+            // SAFETY: Preemption is disabled.
+            CURRENT_THREAD.swap(Some(current_thread));
+        }
+
+        thread.process.mm_list.switch_page_table();
+
+        unsafe {
+            // SAFETY: Preemption is disabled.
+            thread.load_thread_area32();
+        }
     }
 }
 
-#[naked]
-unsafe extern "C" fn fork_return() {
-    // We don't land on the typical `Scheduler::schedule()` function, so we need to
-    // manually enable preemption.
-    naked_asm! {
-        "
-        call {preempt_enable}
-        swapgs
-        pop %rax
-        pop %rbx
-        pop %rcx
-        pop %rdx
-        pop %rdi
-        pop %rsi
-        pop %r8
-        pop %r9
-        pop %r10
-        pop %r11
-        pop %r12
-        pop %r13
-        pop %r14
-        pop %r15
-        pop %rbp
-        add $16, %rsp
-        iretq
-        ",
-        preempt_enable = sym preempt::enable,
-        options(att_syntax),
+impl PinRunnable for ThreadRunnable {
+    type Output = ();
+
+    fn pinned_run(mut self: Pin<&mut Self>, waker: &Waker) -> RunState<Self::Output> {
+        let mut task_context = TaskContext::new();
+        task_context.set_interrupt(false);
+        task_context.set_ip(_arch_fork_return as _);
+        task_context.set_sp(&mut self.interrupt_context as *mut _ as _);
+
+        self.thread.signal_list.set_signal_waker(waker.clone());
+
+        preempt::disable();
+
+        // TODO!!!!!: CHANGE THIS
+        unsafe {
+            asm!(
+                "mov %rsp, {0}",
+                out(reg) self.interrupt_context.int_no,
+                options(nomem, preserves_flags, att_syntax),
+            );
+            self.interrupt_context.int_no -= 512;
+            self.interrupt_context.int_no &= !0xf;
+        };
+
+        unsafe {
+            // SAFETY: Preemption is disabled.
+            arch::load_interrupt_stack(current_cpu(), self.interrupt_context.int_no as u64);
+        }
+
+        preempt::enable();
+
+        self.return_context.switch_to(&task_context);
+
+        // We return here with preempt count == 1.
+        preempt::enable();
+
+        RunState::Finished(())
     }
 }
-
-// TODO: Maybe we can find a better way instead of using `RefCell` for `KernelStack`?
-unsafe impl Sync for Thread {}

+ 1 - 1
src/kernel/vfs/filearray.rs

@@ -61,7 +61,7 @@ impl FileArray {
         &Thread::current().borrow().files
     }
 
-    pub fn new_for_init() -> Arc<Self> {
+    pub fn new() -> Arc<Self> {
         Arc::new(FileArray {
             inner: Spin::new(FileArrayInner {
                 files: BTreeMap::new(),

+ 12 - 6
src/kernel/vfs/mod.rs

@@ -7,6 +7,8 @@ use inode::Mode;
 
 use super::task::Thread;
 
+use lazy_static::lazy_static;
+
 pub mod dentry;
 pub mod file;
 pub mod filearray;
@@ -50,17 +52,21 @@ pub struct FsContext {
     pub umask: Spin<Mode>,
 }
 
+lazy_static! {
+    static ref GLOBAL_FS_CONTEXT: Arc<FsContext> = Arc::new(FsContext {
+        fsroot: Dentry::kernel_root_dentry(),
+        cwd: Spin::new(Dentry::kernel_root_dentry()),
+        umask: Spin::new(0o022),
+    });
+}
+
 impl FsContext {
     pub fn get_current<'lt>() -> &'lt Arc<Self> {
         &Thread::current().borrow().fs_context
     }
 
-    pub fn new_for_init() -> Arc<Self> {
-        Arc::new(FsContext {
-            fsroot: Dentry::kernel_root_dentry(),
-            cwd: Spin::new(Dentry::kernel_root_dentry()),
-            umask: Spin::new(0o022),
-        })
+    pub fn global() -> &'static Arc<Self> {
+        &GLOBAL_FS_CONTEXT
     }
 
     pub fn new_cloned(other: &Self) -> Arc<Self> {

+ 31 - 65
src/lib.rs

@@ -25,15 +25,15 @@ mod prelude;
 mod rcu;
 mod sync;
 
-use alloc::ffi::CString;
-use core::{
-    alloc::{GlobalAlloc, Layout},
-    arch::{asm, global_asm},
-};
+use alloc::{ffi::CString, sync::Arc};
+use core::alloc::{GlobalAlloc, Layout};
 use elf::ParsedElf32;
 use kernel::{
     cpu::init_thiscpu,
-    task::{init_multitasking, Scheduler, Thread},
+    mem::Page,
+    task::{
+        FutureRunnable, ProcessBuilder, ProcessList, Scheduler, Task, ThreadBuilder, ThreadRunnable,
+    },
     vfs::{
         dentry::Dentry,
         mount::{do_mount, MS_NOATIME, MS_NODEV, MS_NOSUID, MS_RDONLY},
@@ -92,20 +92,7 @@ unsafe impl GlobalAlloc for Allocator {
 #[global_allocator]
 static ALLOCATOR: Allocator = Allocator {};
 
-global_asm!(
-    r"
-    .globl to_init_process
-    to_init_process:
-        push %rbp
-        mov %rbx, %rdi
-        jmp {}
-    ",
-    sym init_process,
-    options(att_syntax)
-);
-
 extern "C" {
-    fn to_init_process();
     fn init_allocator();
 }
 
@@ -129,29 +116,16 @@ pub extern "C" fn rust_kinit(early_kstack_pfn: usize) -> ! {
 
     // We need root dentry to be present in constructor of `FsContext`.
     // So call `init_vfs` first, then `init_multitasking`.
-    unsafe { init_multitasking(init_process) };
-
-    let mut unuse_ctx = arch::TaskContext::new();
-    // TODO: Temporary solution: we will never access this later on.
-    unuse_ctx.init(
-        to_init_process as usize,
-        early_kstack_pfn + 0x1000 + 0xffffff0000000000,
-    );
-    unsafe {
-        arch::TaskContext::switch_to(
-            &mut unuse_ctx, // We will never come back
-            &mut *Scheduler::idle_task().get_context_mut_ptr(),
-        );
-    }
+    Scheduler::init_scheduler_thiscpu();
 
-    arch::freeze()
+    let runnable = FutureRunnable::new(init_process(early_kstack_pfn));
+    Scheduler::get().spawn(Task::new(runnable));
+
+    Task::switch_noreturn(&Task::idle());
 }
 
-/// We enter this function with `preempt count == 0`
-extern "C" fn init_process(/* early_kstack_pfn: usize */) {
-    // TODO!!! Should free pass eraly_kstack_pfn and free !!!
-    // unsafe { Page::take_pfn(early_kstack_pfn, 9) };
-    preempt::enable();
+async fn init_process(early_kstack_pfn: usize) {
+    unsafe { Page::take_pfn(early_kstack_pfn, 9) };
 
     kernel::syscall::register_syscalls();
     CharDevice::init().unwrap();
@@ -169,8 +143,8 @@ extern "C" fn init_process(/* early_kstack_pfn: usize */) {
 
     let (ip, sp, mm_list) = {
         // mount fat32 /mnt directory
-        let fs_context = FsContext::get_current();
-        let mnt_dir = Dentry::open(&fs_context, Path::new(b"/mnt/").unwrap(), true).unwrap();
+        let fs_context = FsContext::global();
+        let mnt_dir = Dentry::open(fs_context, Path::new(b"/mnt/").unwrap(), true).unwrap();
 
         mnt_dir.mkdir(0o755).unwrap();
 
@@ -183,7 +157,7 @@ extern "C" fn init_process(/* early_kstack_pfn: usize */) {
         )
         .unwrap();
 
-        let init = Dentry::open(&fs_context, Path::new(b"/mnt/busybox").unwrap(), true)
+        let init = Dentry::open(fs_context, Path::new(b"/mnt/busybox").unwrap(), true)
             .expect("busybox should be present in /mnt");
 
         let argv = vec![
@@ -203,27 +177,19 @@ extern "C" fn init_process(/* early_kstack_pfn: usize */) {
         elf.load(argv, envp).unwrap()
     };
 
-    Thread::current().process.mm_list.replace(mm_list);
-    Thread::current().files.open_console();
-
-    unsafe {
-        asm!(
-            "swapgs",
-            "mov ${ds}, %rax",
-            "mov %ax, %ds",
-            "mov %ax, %es",
-            "push ${ds}",
-            "push {sp}",
-            "push $0x200",
-            "push ${cs}",
-            "push {ip}",
-            "iretq",
-            ds = const 0x33,
-            cs = const 0x2b,
-            in("rax") 0,
-            ip = in(reg) ip.0,
-            sp = in(reg) sp.0,
-            options(att_syntax, noreturn),
-        );
-    }
+    let thread_builder = ThreadBuilder::new().name(Arc::from(*b"busybox"));
+
+    let mut process_list = ProcessList::get().lock();
+    let (thread, process) = ProcessBuilder::new()
+        .mm_list(mm_list)
+        .thread_builder(thread_builder)
+        .build(&mut process_list);
+
+    process_list.set_init_process(process);
+
+    // TODO!!!: Remove this.
+    thread.files.open_console();
+
+    let task = Task::new(ThreadRunnable::new(thread, ip, sp));
+    Scheduler::get().spawn(task);
 }

+ 1 - 1
src/prelude.rs

@@ -19,7 +19,7 @@ pub use crate::bindings::root as bindings;
 
 #[allow(unused_imports)]
 pub(crate) use crate::kernel::console::{
-    print, println, println_debug, println_fatal, println_info, println_warn,
+    print, println, println_debug, println_fatal, println_info, println_trace, println_warn,
 };
 
 #[allow(unused_imports)]

+ 33 - 35
src/sync/condvar.rs

@@ -1,14 +1,19 @@
+use core::task::Waker;
+
 use crate::{
-    kernel::task::{Scheduler, Thread},
+    kernel::{
+        console::println_trace,
+        task::{Scheduler, Task},
+    },
     prelude::*,
     sync::preempt,
 };
 
 use super::{lock::Guard, strategy::LockStrategy};
-use alloc::{collections::vec_deque::VecDeque, sync::Arc};
+use alloc::collections::vec_deque::VecDeque;
 
 pub struct CondVar<const INTERRUPTIBLE: bool> {
-    waiters: Spin<VecDeque<Arc<Thread>>>,
+    waiters: Spin<VecDeque<Waker>>,
 }
 
 impl<const I: bool> core::fmt::Debug for CondVar<I> {
@@ -28,53 +33,48 @@ impl<const I: bool> CondVar<I> {
         }
     }
 
-    fn wake(schedule: &mut Scheduler, thread: &Arc<Thread>) {
-        if I {
-            schedule.iwake(thread);
-        } else {
-            schedule.uwake(thread);
-        }
+    fn wake(waker: Waker) {
+        println_trace!("trace_condvar", "tid({}) is trying to wake", thread.tid);
+        waker.wake();
+        println_trace!("trace_condvar", "tid({}) is awake", thread.tid);
     }
 
-    fn sleep(scheduler: &mut Scheduler) {
-        if I {
-            scheduler.isleep(&Thread::current());
+    fn sleep() -> Waker {
+        let task = Task::current();
+
+        println_trace!("trace_condvar", "tid({}) is trying to sleep", task.id);
+
+        let waker = if I {
+            Waker::from(task.isleep())
         } else {
-            scheduler.usleep(&Thread::current());
-        }
+            Waker::from(task.usleep())
+        };
+
+        println_trace!("trace_condvar", "tid({}) is sleeping", task.id);
+
+        waker
     }
 
     pub fn notify_one(&self) {
-        let mut scheduler = Scheduler::get().lock_irq();
-        if let Some(waiter) = self.waiters.lock().pop_front() {
-            Self::wake(scheduler.as_mut(), &waiter);
+        if let Some(waker) = self.waiters.lock().pop_front() {
+            Self::wake(waker);
         }
     }
 
     pub fn notify_all(&self) {
-        let mut scheduler = Scheduler::get().lock_irq();
-        self.waiters.lock().retain(|waiter| {
-            Self::wake(scheduler.as_mut(), &waiter);
-            false
-        });
+        for waker in self.waiters.lock().drain(..) {
+            Self::wake(waker);
+        }
     }
 
     /// Unlock the `guard`. Then wait until being waken up. Relock the `guard` before returning.
     ///
     /// # Might Sleep
     /// This function **might sleep**, so call it in a preemptible context.
-    ///
-    /// # Return
-    /// - `true`: a pending signal was received
     pub fn wait<'a, T, S: LockStrategy, const W: bool>(&self, guard: &mut Guard<'a, T, S, W>) {
         preempt::disable();
-        {
-            let mut scheduler = Scheduler::get().lock_irq();
-            // We have scheduler locked and IRQ disabled. So no one could be waking us up for now.
-
-            self.waiters.lock().push_back(Thread::current().clone());
-            Self::sleep(scheduler.as_mut());
-        }
+        let waker = Self::sleep();
+        self.waiters.lock().push_back(waker);
 
         // TODO!!!: Another way to do this:
         //
@@ -86,8 +86,6 @@ impl<const I: bool> CondVar<I> {
         Scheduler::schedule();
         unsafe { guard.force_relock() };
 
-        self.waiters
-            .lock_irq()
-            .retain(|waiter| waiter.tid != Thread::current().tid);
+        assert!(Task::current().is_runnable());
     }
 }