Эх сурвалжийг харах

Merge remote-tracking branch 'upstream/master' into riscv64-support

greatbridf 7 сар өмнө
parent
commit
ac9a6b2f1f

+ 1 - 1
crates/eonix_hal/eonix_hal_traits/src/trap.rs

@@ -37,7 +37,7 @@ pub trait TrapReturn {
     /// context before the trap is valid, that is, that the stack pointer
     /// points to a valid stack frame and the program counter points to some
     /// valid instruction.
-    unsafe fn trap_return(&mut self, task_ctx: &mut Self::TaskContext);
+    unsafe fn trap_return(&mut self);
 }
 
 pub trait IrqState {

+ 3 - 4
crates/eonix_hal/src/arch/riscv64/cpu.rs

@@ -18,7 +18,7 @@ static LOCAL_CPU: LazyLock<CPU> = LazyLock::new(CPU::new);
 
 #[derive(Debug, Clone)]
 pub enum UserTLS {
-    Base(u32),
+    Base(u64),
 }
 
 /// RISC-V Hart
@@ -28,9 +28,8 @@ pub struct CPU {
 }
 
 impl UserTLS {
-    #[allow(unused_variables)]
-    pub fn new32(base: u32, _limit: u32, _is_limit_in_pages: bool) -> (Self, u32) {
-        (Self::Base(base), 0)
+    pub fn new(base: u64) -> Self {
+        Self::Base(base)
     }
 }
 

+ 9 - 8
crates/eonix_hal/src/arch/riscv64/mm.rs

@@ -112,13 +112,14 @@ impl RawAttribute for PageAttribute64 {
     fn as_table_attr(self) -> Option<TableAttribute> {
         let mut table_attr = TableAttribute::empty();
 
-        if self.0 & (PA_R | PA_W | PA_X) != 0 {
-            panic!("Encountered a huge page while parsing table attributes");
-        }
-
         if self.0 & PA_V != 0 {
             table_attr |= TableAttribute::PRESENT;
         }
+
+        if table_attr.contains(TableAttribute::PRESENT) && self.0 & (PA_R | PA_W | PA_X) != 0 {
+            return None;
+        }
+
         if self.0 & PA_G != 0 {
             table_attr |= TableAttribute::GLOBAL;
         }
@@ -135,14 +136,14 @@ impl RawAttribute for PageAttribute64 {
     fn as_page_attr(self) -> Option<PageAttribute> {
         let mut page_attr = PageAttribute::empty();
 
-        if self.0 & (PA_R | PA_W | PA_X) == 0 {
-            panic!("Invalid page attribute combination");
-        }
-
         if self.0 & PA_V != 0 {
             page_attr |= PageAttribute::PRESENT;
         }
 
+        if page_attr.contains(PageAttribute::PRESENT) && (self.0 & (PA_R | PA_W | PA_X) == 0) {
+            return None;
+        }
+
         if self.0 & PA_R != 0 {
             page_attr |= PageAttribute::READ;
         }

+ 58 - 17
crates/eonix_hal/src/arch/riscv64/trap/mod.rs

@@ -31,7 +31,6 @@ pub struct TrapScratch {
     kernel_tp: Option<NonZero<u64>>,
     trap_context: Option<NonNull<TrapContext>>,
     handler: unsafe extern "C" fn(),
-    captured_context: Option<NonNull<TaskContext>>,
     capturer_context: TaskContext,
 }
 
@@ -42,10 +41,11 @@ pub(crate) static TRAP_SCRATCH: TrapScratch = TrapScratch {
     kernel_tp: None,
     trap_context: None,
     handler: default_trap_handler,
-    captured_context: None,
     capturer_context: TaskContext::new(),
 };
 
+static mut DIRTY_TASK_CONTEXT: TaskContext = TaskContext::new();
+
 #[unsafe(naked)]
 unsafe extern "C" fn _raw_trap_entry() -> ! {
     naked_asm!(
@@ -93,6 +93,18 @@ unsafe extern "C" fn _raw_trap_entry() -> ! {
         "sd    t4, {t4}(t1)",
         "sd    t5, {t5}(t1)",
         "sd    t6, {t6}(t1)",
+        "sd    s0, {s0}(t1)",
+        "sd    s1, {s1}(t1)",
+        "sd    s2, {s2}(t1)",
+        "sd    s3, {s3}(t1)",
+        "sd    s4, {s4}(t1)",
+        "sd    s5, {s5}(t1)",
+        "sd    s6, {s6}(t1)",
+        "sd    s7, {s7}(t1)",
+        "sd    s8, {s8}(t1)",
+        "sd    s9, {s9}(t1)",
+        "sd    s10, {s10}(t1)",
+        "sd    s11, {s11}(t1)",
         "csrr  t2, sstatus",
         "csrr  t3, sepc",
         "csrr  t4, scause",
@@ -120,6 +132,18 @@ unsafe extern "C" fn _raw_trap_entry() -> ! {
         t4 = const Registers::OFFSET_T4,
         t5 = const Registers::OFFSET_T5,
         t6 = const Registers::OFFSET_T6,
+        s0 = const Registers::OFFSET_S0,
+        s1 = const Registers::OFFSET_S1,
+        s2 = const Registers::OFFSET_S2,
+        s3 = const Registers::OFFSET_S3,
+        s4 = const Registers::OFFSET_S4,
+        s5 = const Registers::OFFSET_S5,
+        s6 = const Registers::OFFSET_S6,
+        s7 = const Registers::OFFSET_S7,
+        s8 = const Registers::OFFSET_S8,
+        s9 = const Registers::OFFSET_S9,
+        s10 = const Registers::OFFSET_S10,
+        s11 = const Registers::OFFSET_S11,
         sstatus = const TrapContext::OFFSET_SSTATUS,
         sepc = const TrapContext::OFFSET_SEPC,
         scause = const TrapContext::OFFSET_SCAUSE,
@@ -146,6 +170,18 @@ unsafe extern "C" fn _raw_trap_return(ctx: &mut TrapContext) -> ! {
         "ld t3, {t3}(a0)",
         "ld t4, {sepc}(a0)",    // Load sepc from TrapContext
         "ld t5, {sstatus}(a0)", // Load sstatus from TrapContext
+        "ld s0, {s0}(a0)",
+        "ld s1, {s1}(a0)",
+        "ld s2, {s2}(a0)",
+        "ld s3, {s3}(a0)",
+        "ld s4, {s4}(a0)",
+        "ld s5, {s5}(a0)",
+        "ld s6, {s6}(a0)",
+        "ld s7, {s7}(a0)",
+        "ld s8, {s8}(a0)",
+        "ld s9, {s9}(a0)",
+        "ld s10, {s10}(a0)",
+        "ld s11, {s11}(a0)",
         "csrw sepc, t4",        // Restore sepc
         "csrw sstatus, t5",     // Restore sstatus
         "ld t4, {t4}(a0)",
@@ -172,6 +208,18 @@ unsafe extern "C" fn _raw_trap_return(ctx: &mut TrapContext) -> ! {
         t4 = const Registers::OFFSET_T4,
         t5 = const Registers::OFFSET_T5,
         t6 = const Registers::OFFSET_T6,
+        s0 = const Registers::OFFSET_S0,
+        s1 = const Registers::OFFSET_S1,
+        s2 = const Registers::OFFSET_S2,
+        s3 = const Registers::OFFSET_S3,
+        s4 = const Registers::OFFSET_S4,
+        s5 = const Registers::OFFSET_S5,
+        s6 = const Registers::OFFSET_S6,
+        s7 = const Registers::OFFSET_S7,
+        s8 = const Registers::OFFSET_S8,
+        s9 = const Registers::OFFSET_S9,
+        s10 = const Registers::OFFSET_S10,
+        s11 = const Registers::OFFSET_S11,
         sstatus = const TrapContext::OFFSET_SSTATUS,
         sepc = const TrapContext::OFFSET_SEPC,
     );
@@ -201,10 +249,10 @@ unsafe extern "C" fn default_trap_handler() {
 #[unsafe(naked)]
 unsafe extern "C" fn captured_trap_handler() {
     naked_asm!(
-        "ld   a0, {captured_context_offset}(t0)",
+        "la   a0, {dirty_task_context}",
         "addi a1, t0, {capturer_context_offset}",
         "j {switch}",
-        captured_context_offset = const offset_of!(TrapScratch, captured_context),
+        dirty_task_context = sym DIRTY_TASK_CONTEXT,
         capturer_context_offset = const offset_of!(TrapScratch, capturer_context),
         switch = sym TaskContext::switch,
     );
@@ -236,28 +284,21 @@ impl TrapScratch {
 impl TrapReturn for TrapContext {
     type TaskContext = TaskContext;
 
-    unsafe fn trap_return(&mut self, to_ctx: &mut Self::TaskContext) {
+    unsafe fn trap_return(&mut self) {
         let irq_states = disable_irqs_save();
-        let old_handler = {
-            let trap_scratch = TRAP_SCRATCH.as_mut();
-            trap_scratch.captured_context = Some(NonNull::from(&mut *to_ctx));
-            core::mem::replace(&mut trap_scratch.handler, captured_trap_handler)
-        };
+        let old_handler =
+            core::mem::replace(&mut TRAP_SCRATCH.as_mut().handler, captured_trap_handler);
 
+        let mut to_ctx = TaskContext::new();
         to_ctx.set_program_counter(captured_trap_return as usize);
         to_ctx.set_stack_pointer(&raw mut *self as usize);
         to_ctx.set_interrupt_enabled(false);
 
         unsafe {
-            TaskContext::switch(&mut TRAP_SCRATCH.as_mut().capturer_context, to_ctx);
-        }
-
-        {
-            let trap_scratch = TRAP_SCRATCH.as_mut();
-            trap_scratch.handler = old_handler;
-            trap_scratch.captured_context = None;
+            TaskContext::switch(&mut TRAP_SCRATCH.as_mut().capturer_context, &mut to_ctx);
         }
 
+        TRAP_SCRATCH.as_mut().handler = old_handler;
         irq_states.restore();
     }
 }

+ 27 - 3
crates/eonix_hal/src/arch/riscv64/trap/trap_context.rs

@@ -36,6 +36,18 @@ pub struct Registers {
     t4: u64,
     t5: u64,
     t6: u64,
+    s0: u64,
+    s1: u64,
+    s2: u64,
+    s3: u64,
+    s4: u64,
+    s5: u64,
+    s6: u64,
+    s7: u64,
+    s8: u64,
+    s9: u64,
+    s10: u64,
+    s11: u64,
 }
 
 /// Saved CPU context when a trap (interrupt or exception) occurs on RISC-V 64.
@@ -69,12 +81,24 @@ impl Registers {
     pub const OFFSET_T4: usize = 16 * 8;
     pub const OFFSET_T5: usize = 17 * 8;
     pub const OFFSET_T6: usize = 18 * 8;
+    pub const OFFSET_S0: usize = 19 * 8;
+    pub const OFFSET_S1: usize = 20 * 8;
+    pub const OFFSET_S2: usize = 21 * 8;
+    pub const OFFSET_S3: usize = 22 * 8;
+    pub const OFFSET_S4: usize = 23 * 8;
+    pub const OFFSET_S5: usize = 24 * 8;
+    pub const OFFSET_S6: usize = 25 * 8;
+    pub const OFFSET_S7: usize = 26 * 8;
+    pub const OFFSET_S8: usize = 27 * 8;
+    pub const OFFSET_S9: usize = 28 * 8;
+    pub const OFFSET_S10: usize = 29 * 8;
+    pub const OFFSET_S11: usize = 30 * 8;
 }
 
 impl TrapContext {
-    pub const OFFSET_SSTATUS: usize = 19 * 8;
-    pub const OFFSET_SEPC: usize = 20 * 8;
-    pub const OFFSET_SCAUSE: usize = 21 * 8;
+    pub const OFFSET_SSTATUS: usize = 31 * 8;
+    pub const OFFSET_SEPC: usize = 32 * 8;
+    pub const OFFSET_SCAUSE: usize = 33 * 8;
 
     fn syscall_no(&self) -> usize {
         self.regs.a7 as usize

+ 24 - 0
crates/posix_types/src/ctypes.rs

@@ -0,0 +1,24 @@
+#[cfg(target_arch = "x86_64")]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
+pub struct PtrT(u32);
+
+#[cfg(not(target_arch = "x86_64"))]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
+pub struct PtrT(u64);
+
+impl PtrT {
+    pub fn new(ptr: usize) -> Self {
+        PtrT(
+            ptr.try_into()
+                .expect("Pointer truncated when converting to ptr_t"),
+        )
+    }
+
+    pub fn addr(self) -> usize {
+        self.0 as usize
+    }
+
+    pub fn is_null(self) -> bool {
+        self.0 == 0
+    }
+}

+ 4 - 0
crates/posix_types/src/lib.rs

@@ -1,8 +1,12 @@
 #![no_std]
 
 pub mod constants;
+pub mod ctypes;
 pub mod open;
 pub mod result;
 pub mod signal;
 pub mod stat;
 pub mod syscall_no;
+
+#[cfg(target_arch = "x86_64")]
+pub mod x86_64;

+ 39 - 0
crates/posix_types/src/x86_64.rs

@@ -0,0 +1,39 @@
+#[repr(transparent)]
+#[derive(Debug, Clone, Copy)]
+pub struct UserDescriptorFlags(u32);
+
+#[repr(C)]
+#[derive(Debug, Clone, Copy)]
+pub struct UserDescriptor {
+    pub entry: u32,
+    pub base: u32,
+    pub limit: u32,
+    pub flags: UserDescriptorFlags,
+}
+
+#[allow(dead_code)]
+impl UserDescriptorFlags {
+    pub fn is_32bit_segment(&self) -> bool {
+        self.0 & 0b1 != 0
+    }
+
+    pub fn contents(&self) -> u32 {
+        self.0 & 0b110
+    }
+
+    pub fn is_read_exec_only(&self) -> bool {
+        self.0 & 0b1000 != 0
+    }
+
+    pub fn is_limit_in_pages(&self) -> bool {
+        self.0 & 0b10000 != 0
+    }
+
+    pub fn is_present(&self) -> bool {
+        self.0 & 0b100000 == 0
+    }
+
+    pub fn is_usable(&self) -> bool {
+        self.0 & 0b1000000 != 0
+    }
+}

+ 1 - 0
script/build-img.sh

@@ -32,6 +32,7 @@ if [ "$ARCH" = "x86_64" ]; then
     $SUDO cp ./user-programs/busybox build/mnt/busybox
     $SUDO cp ./user-programs/busybox-minimal build/mnt/busybox_
     $SUDO cp ./user-programs/ld-musl-i386.so.1 build/mnt/ld-musl-i386.so.1
+    $SUDO cp ./user-programs/pthread_test build/mnt/pthread_test
 elif [ "$ARCH" = "riscv64" ]; then
     $SUDO cp ./user-programs/busybox.static build/mnt/busybox
 fi

+ 1 - 0
src/kernel/constants.rs

@@ -25,6 +25,7 @@ pub const ENXIO: u32 = 6;
 pub const ENOEXEC: u32 = 8;
 pub const EBADF: u32 = 9;
 pub const ECHILD: u32 = 10;
+pub const EAGAIN: u32 = 11;
 pub const ENOMEM: u32 = 12;
 pub const EACCES: u32 = 13;
 pub const EFAULT: u32 = 14;

+ 3 - 1
src/kernel/mem/mm_area.rs

@@ -73,7 +73,9 @@ impl MMArea {
                     },
                 };
 
-                self.range.get_mut().shrink(diff);
+                let new_range = self.range_borrow().shrink(self.range_borrow().end() - at);
+
+                *self.range.get_mut() = new_range;
                 (Some(self), Some(right))
             }
         }

+ 54 - 53
src/kernel/mem/mm_list.rs

@@ -184,76 +184,67 @@ impl MMListInner<'_> {
             return Err(EINVAL);
         }
 
-        let mut left_area = None;
-        let mut right_area = None;
-        let mut mid_area = None;
-
-        self.areas.retain(|area| {
+        let mut found = false;
+        let old_areas = core::mem::take(&mut self.areas);
+        for mut area in old_areas {
             let Some((left, mid, right)) = area.range().mask_with_checked(&range_to_protect) else {
-                return true;
+                self.areas.insert(area);
+                continue;
             };
 
-            for pte in self.page_table.iter_user(mid) {
-                let mut page_attr = pte.get_attr().as_page_attr().expect("Not a page attribute");
+            found = true;
 
-                page_attr.set(PageAttribute::READ, permission.read);
-                page_attr.set(PageAttribute::WRITE, permission.write);
-                page_attr.set(PageAttribute::EXECUTE, permission.execute);
+            if let Some(left) = left {
+                let (Some(left), Some(right)) = area.split(left.end()) else {
+                    unreachable!("`left.end()` is within the area");
+                };
 
-                pte.set_attr(page_attr.into());
+                self.areas.insert(left);
+                area = right;
             }
 
-            match (left, right) {
-                (None, None) => {}
-                (Some(left), None) => {
-                    assert!(left_area.is_none());
-                    let (Some(left), Some(right)) = area.clone().split(left.end()) else {
-                        unreachable!("`left.end()` is within the area");
-                    };
+            if let Some(right) = right {
+                let (Some(left), Some(right)) = area.split(right.start()) else {
+                    unreachable!("`right.start()` is within the area");
+                };
 
-                    left_area = Some(left);
-                    mid_area = Some(right);
-                }
-                (None, Some(right)) => {
-                    assert!(right_area.is_none());
-                    let (Some(left), Some(right)) = area.clone().split(right.start()) else {
-                        unreachable!("`right.start()` is within the area");
-                    };
+                self.areas.insert(right);
+                area = left;
+            }
 
-                    mid_area = Some(left);
-                    right_area = Some(right);
+            for pte in self.page_table.iter_user(mid) {
+                let mut page_attr = pte.get_attr().as_page_attr().expect("Not a page attribute");
+
+                if !permission.read && !permission.write && !permission.execute {
+                    // If no permissions are set, we just remove the page.
+                    page_attr.remove(
+                        PageAttribute::PRESENT
+                            | PageAttribute::READ
+                            | PageAttribute::WRITE
+                            | PageAttribute::EXECUTE,
+                    );
+
+                    pte.set_attr(page_attr.into());
+                    continue;
                 }
-                (Some(left), Some(right)) => {
-                    assert!(left_area.is_none());
-                    assert!(right_area.is_none());
-                    let (Some(left), Some(mid)) = area.clone().split(left.end()) else {
-                        unreachable!("`left.end()` is within the area");
-                    };
 
-                    let (Some(mid), Some(right)) = mid.split(right.start()) else {
-                        unreachable!("`right.start()` is within the area");
-                    };
+                page_attr.set(PageAttribute::READ, permission.read);
 
-                    left_area = Some(left);
-                    right_area = Some(right);
-                    mid_area = Some(mid);
+                if !page_attr.contains(PageAttribute::COPY_ON_WRITE) {
+                    page_attr.set(PageAttribute::WRITE, permission.write);
                 }
-            }
 
-            false
-        });
+                page_attr.set(PageAttribute::EXECUTE, permission.execute);
 
-        assert!(mid_area.is_some());
+                pte.set_attr(page_attr.into());
+            }
 
-        if let Some(mut mid) = mid_area {
-            mid.permission = permission;
-            self.areas.insert(mid);
-        }
-        if let Some(front) = left_area {
-            self.areas.insert(front);
+            area.permission = permission;
+            self.areas.insert(area);
         }
-        if let Some(back) = right_area {
-            self.areas.insert(back);
+
+        if !found {
+            return Err(ENOMEM);
         }
 
         Ok(())
@@ -285,6 +276,12 @@ impl MMListInner<'_> {
     }
 }
 
+impl Drop for MMListInner<'_> {
+    fn drop(&mut self) {
+        // TODO: Recycle all pages in the page table.
+    }
+}
+
 impl MMList {
     async fn flush_user_tlbs(&self) {
         match self.user_count.load(Ordering::Relaxed) {
@@ -358,6 +355,10 @@ impl MMList {
         list
     }
 
+    pub async fn new_shared(&self) -> Self {
+        todo!()
+    }
+
     pub fn activate(&self) {
         self.user_count.fetch_add(1, Ordering::Acquire);
 

+ 8 - 0
src/kernel/mem/mm_list/page_fault.rs

@@ -67,6 +67,14 @@ impl MMList {
         let area = inner.areas.get(&VRange::from(addr)).ok_or(Signal::SIGBUS)?;
 
         // Check user access permission.
+        if error.contains(PageFaultErrorCode::Read) && !area.permission.read {
+            // Under x86_64, we don't have a way to distinguish
+            // between a read fault and a non-present fault. But it should be OK
+            // since non-readable pages are not allowed under x86 and if we read
+            // both the two false.
+            Err(Signal::SIGSEGV)?
+        }
+
         if error.contains(PageFaultErrorCode::Write) && !area.permission.write {
             Err(Signal::SIGSEGV)?
         }

+ 5 - 0
src/kernel/syscall/file_rw.rs

@@ -108,6 +108,11 @@ fn dup2(old_fd: FD, new_fd: FD) -> KResult<FD> {
     thread.files.dup_to(old_fd, new_fd, OpenFlags::empty())
 }
 
+#[eonix_macros::define_syscall(SYS_DUP3)]
+fn dup3(old_fd: FD, new_fd: FD, flags: OpenFlags) -> KResult<FD> {
+    thread.files.dup_to(old_fd, new_fd, flags)
+}
+
 #[eonix_macros::define_syscall(SYS_PIPE2)]
 fn pipe2(pipe_fd: *mut [FD; 2], flags: OpenFlags) -> KResult<()> {
     let mut buffer = UserBuffer::new(pipe_fd as *mut u8, core::mem::size_of::<[FD; 2]>())?;

+ 28 - 17
src/kernel/syscall/mm.rs

@@ -59,26 +59,32 @@ fn do_mmap2(
 
     let mm_list = &thread.process.mm_list;
 
-    // PROT_NONE, we do unmapping.
-    if prot.is_empty() {
-        Task::block_on(mm_list.unmap(addr, len)).map(|_| 0)?;
-        return Ok(0);
-    }
-    // Otherwise, do mmapping.
-
     // TODO!!!: If we are doing mmap's in 32-bit mode, we should check whether
     //          `addr` is above user reachable memory.
     let addr = if flags.contains(UserMmapFlags::MAP_FIXED) {
-        mm_list.mmap_fixed(
-            addr,
-            len,
-            Mapping::Anonymous,
-            Permission {
-                read: prot.contains(UserMmapProtocol::PROT_READ),
-                write: prot.contains(UserMmapProtocol::PROT_WRITE),
-                execute: prot.contains(UserMmapProtocol::PROT_EXEC),
-            },
-        )
+        if prot.is_empty() {
+            Task::block_on(mm_list.protect(
+                addr,
+                len,
+                Permission {
+                    read: prot.contains(UserMmapProtocol::PROT_READ),
+                    write: prot.contains(UserMmapProtocol::PROT_WRITE),
+                    execute: prot.contains(UserMmapProtocol::PROT_EXEC),
+                },
+            ))
+            .map(|_| addr)
+        } else {
+            mm_list.mmap_fixed(
+                addr,
+                len,
+                Mapping::Anonymous,
+                Permission {
+                    read: prot.contains(UserMmapProtocol::PROT_READ),
+                    write: prot.contains(UserMmapProtocol::PROT_WRITE),
+                    execute: prot.contains(UserMmapProtocol::PROT_EXEC),
+                },
+            )
+        }
     } else {
         mm_list.mmap_hint(
             addr,
@@ -163,4 +169,9 @@ fn mprotect(addr: usize, len: usize, prot: UserMmapProtocol) -> KResult<()> {
     ))
 }
 
+#[eonix_macros::define_syscall(SYS_MEMBARRIER)]
+fn membarrier(_cmd: usize, _flags: usize) -> KResult<()> {
+    Ok(())
+}
+
 pub fn keep_alive() {}

+ 110 - 42
src/kernel/syscall/procops.rs

@@ -1,5 +1,3 @@
-use core::ptr::NonNull;
-
 use super::sysinfo::TimeVal;
 use super::SyscallNoReturn;
 use crate::io::Buffer;
@@ -9,10 +7,11 @@ use crate::kernel::constants::{
 };
 use crate::kernel::mem::PageBuffer;
 use crate::kernel::task::{
-    new_thread_runnable, KernelStack, ProcessBuilder, ProcessList, ProgramLoader, Signal,
-    SignalAction, SignalMask, Thread, ThreadBuilder, UserDescriptor, WaitObject, WaitType,
+    do_clone, futex_wait, futex_wake, FutexFlags, FutexOp, ProcessList, ProgramLoader, Signal,
+    SignalAction, SignalMask, Thread, WaitObject, WaitType,
 };
-use crate::kernel::user::dataflow::UserString;
+use crate::kernel::task::{parse_futexop, CloneArgs};
+use crate::kernel::user::dataflow::{CheckedUserPointer, UserString};
 use crate::kernel::user::{UserPointer, UserPointerMut};
 use crate::kernel::vfs::{self, dentry::Dentry};
 use crate::path::Path;
@@ -21,12 +20,14 @@ use crate::{kernel::user::dataflow::UserBuffer, prelude::*};
 use alloc::borrow::ToOwned;
 use alloc::ffi::CString;
 use bitflags::bitflags;
+use core::ptr::NonNull;
+use eonix_hal::processor::UserTLS;
 use eonix_hal::traits::trap::RawTrapContext;
 use eonix_mm::address::Addr as _;
-use eonix_runtime::scheduler::Scheduler;
 use eonix_runtime::task::Task;
 use eonix_sync::AsProof as _;
 use posix_types::constants::{P_ALL, P_PID};
+use posix_types::ctypes::PtrT;
 use posix_types::signal::{SigAction, SigInfo};
 use posix_types::syscall_no::*;
 
@@ -113,16 +114,16 @@ fn mount(source: *const u8, target: *const u8, fstype: *const u8, flags: usize)
     )
 }
 
-fn get_strings(mut ptr_strings: UserPointer<'_, u32>) -> KResult<Vec<CString>> {
+fn get_strings(mut ptr_strings: UserPointer<'_, PtrT>) -> KResult<Vec<CString>> {
     let mut strings = Vec::new();
 
     loop {
-        let addr = ptr_strings.read()?;
-        if addr == 0 {
+        let ptr = ptr_strings.read()?;
+        if ptr.is_null() {
             break;
         }
 
-        let user_string = UserString::new(addr as *const u8)?;
+        let user_string = UserString::new(ptr.addr() as *const u8)?;
         strings.push(user_string.as_cstr().to_owned());
         ptr_strings = ptr_strings.offset(1)?;
     }
@@ -131,7 +132,7 @@ fn get_strings(mut ptr_strings: UserPointer<'_, u32>) -> KResult<Vec<CString>> {
 }
 
 #[eonix_macros::define_syscall(SYS_EXECVE)]
-fn execve(exec: *const u8, argv: *const u32, envp: *const u32) -> KResult<()> {
+fn execve(exec: *const u8, argv: *const PtrT, envp: *const PtrT) -> KResult<()> {
     let exec = UserString::new(exec)?;
     let argv = get_strings(UserPointer::new(argv)?)?;
     let envp = get_strings(UserPointer::new(envp)?)?;
@@ -172,7 +173,7 @@ fn execve(exec: *const u8, argv: *const u32, envp: *const u32) -> KResult<()> {
 fn exit(status: u32) -> SyscallNoReturn {
     unsafe {
         let mut procs = Task::block_on(ProcessList::get().write());
-        procs.do_kill_process(&thread.process, WaitType::Exited(status));
+        Task::block_on(procs.do_exit(&thread, WaitType::Exited(status), false));
     }
 
     SyscallNoReturn
@@ -180,7 +181,12 @@ fn exit(status: u32) -> SyscallNoReturn {
 
 #[eonix_macros::define_syscall(SYS_EXIT_GROUP)]
 fn exit_group(status: u32) -> SyscallNoReturn {
-    sys_exit(thread, status)
+    unsafe {
+        let mut procs = Task::block_on(ProcessList::get().write());
+        Task::block_on(procs.do_exit(&thread, WaitType::Exited(status), true));
+    }
+
+    SyscallNoReturn
 }
 
 enum WaitInfo {
@@ -376,8 +382,7 @@ fn geteuid32() -> KResult<u32> {
 
 #[eonix_macros::define_syscall(SYS_GETEUID)]
 fn geteuid() -> KResult<u32> {
-    // All users are root for now.
-    Ok(0)
+    do_geteuid(thread)
 }
 
 #[eonix_macros::define_syscall(SYS_GETGID)]
@@ -397,14 +402,44 @@ fn gettid() -> KResult<u32> {
     Ok(thread.tid)
 }
 
+pub fn parse_user_tls(arch_tls: usize) -> KResult<UserTLS> {
+    #[cfg(target_arch = "x86_64")]
+    {
+        let desc = arch_tls as *mut posix_types::x86_64::UserDescriptor;
+        let desc_pointer = UserPointerMut::new(desc)?;
+        let mut desc = desc_pointer.read()?;
+
+        // Clear the TLS area if it is not present.
+        if desc.flags.is_read_exec_only() && !desc.flags.is_present() {
+            if desc.limit != 0 && desc.base != 0 {
+                let len = if desc.flags.is_limit_in_pages() {
+                    (desc.limit as usize) << 12
+                } else {
+                    desc.limit as usize
+                };
+
+                CheckedUserPointer::new(desc.base as _, len)?.zero()?;
+            }
+        }
+
+        let (new_tls, entry) =
+            UserTLS::new32(desc.base, desc.limit, desc.flags.is_limit_in_pages());
+        desc.entry = entry;
+        desc_pointer.write(desc)?;
+
+        Ok(new_tls)
+    }
+
+    #[cfg(target_arch = "riscv64")]
+    {
+        Ok(UserTLS::new(arch_tls as u64))
+    }
+}
+
 #[cfg(target_arch = "x86_64")]
 #[eonix_macros::define_syscall(SYS_SET_THREAD_AREA)]
-fn set_thread_area(desc: *mut UserDescriptor) -> KResult<()> {
-    let desc_pointer = UserPointerMut::new(desc)?;
-    let mut desc = desc_pointer.read()?;
-
-    thread.set_thread_area(&mut desc)?;
-    desc_pointer.write(desc)?;
+fn set_thread_area(arch_tls: usize) -> KResult<()> {
+    thread.set_user_tls(parse_user_tls(arch_tls)?)?;
 
     // SAFETY: Preemption is disabled on calling `load_thread_area32()`.
     unsafe {
@@ -417,9 +452,8 @@ fn set_thread_area(desc: *mut UserDescriptor) -> KResult<()> {
 }
 
 #[eonix_macros::define_syscall(SYS_SET_TID_ADDRESS)]
-fn set_tid_address(tidptr: *mut u32) -> KResult<u32> {
-    // TODO!!!: Implement this. We don't use it for now.
-    let _tidptr = UserPointerMut::new(tidptr)?;
+fn set_tid_address(tidptr: usize) -> KResult<u32> {
+    thread.clear_child_tid(Some(tidptr));
     Ok(thread.tid)
 }
 
@@ -639,31 +673,65 @@ fn chmod(pathname: *const u8, mode: u32) -> KResult<()> {
 
 #[cfg(target_arch = "x86_64")]
 #[eonix_macros::define_syscall(SYS_VFORK)]
-fn vfork() -> u32 {
-    sys_fork(thread)
+fn vfork() -> KResult<u32> {
+    let clone_args = CloneArgs::for_vfork();
+
+    do_clone(thread, clone_args)
 }
 
 #[cfg(target_arch = "x86_64")]
 #[eonix_macros::define_syscall(SYS_FORK)]
-fn fork() -> u32 {
-    let mut procs = Task::block_on(ProcessList::get().write());
+fn fork() -> KResult<u32> {
+    let clone_args = CloneArgs::for_fork();
+
+    do_clone(thread, clone_args)
+}
+
+#[eonix_macros::define_syscall(SYS_CLONE)]
+fn clone(
+    clone_flags: usize,
+    new_sp: usize,
+    parent_tidptr: usize,
+    tls: usize,
+    child_tidptr: usize,
+) -> KResult<u32> {
+    let clone_args = CloneArgs::for_clone(clone_flags, new_sp, child_tidptr, parent_tidptr, tls)?;
 
-    let current_process = thread.process.clone();
-    let current_pgroup = current_process.pgroup(procs.prove()).clone();
-    let current_session = current_process.session(procs.prove()).clone();
+    do_clone(thread, clone_args)
+}
 
-    let thread_builder = ThreadBuilder::new().fork_from(&thread);
-    let (new_thread, new_process) = ProcessBuilder::new()
-        .mm_list(Task::block_on(current_process.mm_list.new_cloned()))
-        .parent(current_process)
-        .pgroup(current_pgroup)
-        .session(current_session)
-        .thread_builder(thread_builder)
-        .build(&mut procs);
+#[eonix_macros::define_syscall(SYS_FUTEX)]
+fn futex(
+    uaddr: usize,
+    op: u32,
+    val: u32,
+    _time_out: usize,
+    _uaddr2: usize,
+    _val3: u32,
+) -> KResult<usize> {
+    let (futex_op, futex_flag) = parse_futexop(op)?;
 
-    Scheduler::get().spawn::<KernelStack, _>(new_thread_runnable(new_thread));
+    let pid = if futex_flag.contains(FutexFlags::FUTEX_PRIVATE) {
+        Some(thread.process.pid)
+    } else {
+        None
+    };
 
-    new_process.pid
+    match futex_op {
+        FutexOp::FUTEX_WAIT => {
+            Task::block_on(futex_wait(uaddr, pid, val as u32, None))?;
+            return Ok(0);
+        }
+        FutexOp::FUTEX_WAKE => {
+            return Task::block_on(futex_wake(uaddr, pid, val as u32));
+        }
+        FutexOp::FUTEX_REQUEUE => {
+            todo!()
+        }
+        _ => {
+            todo!()
+        }
+    }
 }
 
 #[cfg(target_arch = "x86_64")]
@@ -680,7 +748,7 @@ fn sigreturn() -> KResult<SyscallNoReturn> {
                 "`sigreturn` failed in thread {} with error {err}!",
                 thread.tid
             );
-            Task::block_on(thread.process.force_kill(Signal::SIGSEGV));
+            Task::block_on(thread.force_kill(Signal::SIGSEGV));
         })?;
 
     Ok(SyscallNoReturn)

+ 6 - 2
src/kernel/task.rs

@@ -1,3 +1,5 @@
+mod clone;
+mod futex;
 mod kernel_stack;
 mod loader;
 mod process;
@@ -7,11 +9,13 @@ mod session;
 mod signal;
 mod thread;
 
+pub use clone::{do_clone, CloneArgs, CloneFlags};
+pub use futex::{futex_wait, futex_wake, parse_futexop, FutexFlags, FutexOp};
 pub use kernel_stack::KernelStack;
 pub use loader::ProgramLoader;
-pub use process::{Process, ProcessBuilder, WaitObject, WaitType};
+pub use process::{alloc_pid, Process, ProcessBuilder, WaitObject, WaitType};
 pub use process_group::ProcessGroup;
 pub use process_list::ProcessList;
 pub use session::Session;
 pub use signal::{Signal, SignalAction, SignalMask};
-pub use thread::{new_thread_runnable, Thread, ThreadBuilder, UserDescriptor};
+pub use thread::{new_thread_runnable, Thread, ThreadBuilder};

+ 173 - 0
src/kernel/task/clone.rs

@@ -0,0 +1,173 @@
+use crate::{
+    kernel::{
+        syscall::procops::parse_user_tls,
+        task::{
+            alloc_pid, new_thread_runnable, KernelStack, ProcessBuilder, ProcessList, Thread,
+            ThreadBuilder,
+        },
+        user::{dataflow::CheckedUserPointer, UserPointerMut},
+    },
+    KResult,
+};
+use bitflags::bitflags;
+use core::num::NonZero;
+use eonix_hal::processor::UserTLS;
+use eonix_runtime::{scheduler::Scheduler, task::Task};
+use eonix_sync::AsProof;
+
+use crate::kernel::task::Signal;
+
+bitflags! {
+    #[derive(Debug, Default)]
+    pub struct CloneFlags: usize {
+        const CLONE_VM      = 0x00000100;       /* Set if VM shared between processes.  */
+        const CLONE_FS      = 0x00000200;       /* Set if fs info shared between processes.  */
+        const CLONE_FILES   = 0x00000400;       /* Set if open files shared between processes.  */
+        const CLONE_SIGHAND = 0x00000800;       /* Set if signal handlers shared.  */
+        const CLONE_PIDFD   = 0x00001000;       /* Set if a pidfd should be placed in parent.  */
+        const CLONE_PTRACE  = 0x00002000;       /* Set if tracing continues on the child.  */
+        const CLONE_VFORK   = 0x00004000;       /* Set if the parent wants the child to wake it up on mm_release.  */
+        const CLONE_PARENT  = 0x00008000;       /* Set if we want to have the same parent as the cloner.  */
+        const CLONE_THREAD  = 0x00010000;       /* Set to add to same thread group.  */
+        const CLONE_NEWNS   = 0x00020000;       /* Set to create new namespace.  */
+        const CLONE_SYSVSEM = 0x00040000;       /* Set to shared SVID SEM_UNDO semantics.  */
+        const CLONE_SETTLS  = 0x00080000;       /* Set TLS info.  */
+        const CLONE_PARENT_SETTID = 0x00100000; /* Store TID in userlevel buffer before MM copy.  */
+        const CLONE_CHILD_CLEARTID = 0x00200000;/* Register exit futex and memory location to clear.  */
+        const CLONE_DETACHED = 0x00400000;      /* Create clone detached.  */
+        const CLONE_UNTRACED = 0x00800000;      /* Set if the tracing process can't force CLONE_PTRACE on this clone.  */
+        const CLONE_CHILD_SETTID = 0x01000000;  /* Store TID in userlevel buffer in the child.  */
+        const CLONE_NEWCGROUP   = 0x02000000;	/* New cgroup namespace.  */
+        const CLONE_NEWUTS	= 0x04000000;	    /* New utsname group.  */
+        const CLONE_NEWIPC	= 0x08000000;	    /* New ipcs.  */
+        const CLONE_NEWUSER	= 0x10000000;	    /* New user namespace.  */
+        const CLONE_NEWPID	= 0x20000000;	    /* New pid namespace.  */
+        const CLONE_NEWNET	= 0x40000000;	    /* New network namespace.  */
+        const CLONE_IO	= 0x80000000;	        /* Clone I/O context.  */
+    }
+}
+
+#[derive(Debug)]
+pub struct CloneArgs {
+    pub flags: CloneFlags,
+    pub sp: Option<NonZero<usize>>, // Stack pointer for the new thread.
+    pub exit_signal: Option<Signal>, // Signal to send to the parent on exit.
+    pub set_tid_ptr: Option<usize>, // Pointer to set child TID in user space.
+    pub clear_tid_ptr: Option<usize>, // Pointer to clear child TID in user space.
+    pub parent_tid_ptr: Option<usize>, // Pointer to parent TID in user space.
+    pub tls: Option<UserTLS>,       // Pointer to TLS information.
+}
+
+impl CloneArgs {
+    const MASK: usize = 0xff;
+
+    pub fn for_clone(
+        flags: usize,
+        sp: usize,
+        child_tid_ptr: usize,
+        parent_tid_ptr: usize,
+        tls: usize,
+    ) -> KResult<Self> {
+        let clone_flags = CloneFlags::from_bits_truncate(flags & !Self::MASK);
+        let exit_signal = flags & Self::MASK;
+        let exit_signal = if exit_signal != 0 {
+            Some(Signal::try_from(exit_signal as u32)?)
+        } else {
+            None
+        };
+
+        let mut set_tid_ptr = None;
+        if clone_flags.contains(CloneFlags::CLONE_CHILD_SETTID) {
+            set_tid_ptr = Some(child_tid_ptr);
+        }
+
+        let mut clear_tid_ptr = None;
+        if clone_flags.contains(CloneFlags::CLONE_CHILD_CLEARTID) {
+            clear_tid_ptr = Some(child_tid_ptr);
+        }
+
+        let parent_tid_ptr = clone_flags
+            .contains(CloneFlags::CLONE_PARENT_SETTID)
+            .then_some(parent_tid_ptr);
+
+        let tls = if clone_flags.contains(CloneFlags::CLONE_SETTLS) {
+            Some(parse_user_tls(tls)?)
+        } else {
+            None
+        };
+
+        let clone_args = CloneArgs {
+            flags: clone_flags,
+            sp: NonZero::new(sp),
+            set_tid_ptr,
+            clear_tid_ptr,
+            parent_tid_ptr,
+            exit_signal,
+            tls,
+        };
+
+        Ok(clone_args)
+    }
+
+    pub fn for_fork() -> Self {
+        CloneArgs {
+            flags: CloneFlags::empty(),
+            sp: None,
+            set_tid_ptr: None,
+            clear_tid_ptr: None,
+            parent_tid_ptr: None,
+            exit_signal: Some(Signal::SIGCHLD),
+            tls: None,
+        }
+    }
+
+    pub fn for_vfork() -> Self {
+        CloneArgs {
+            flags: CloneFlags::CLONE_VFORK | CloneFlags::CLONE_VM,
+            sp: None,
+            set_tid_ptr: None,
+            clear_tid_ptr: None,
+            parent_tid_ptr: None,
+            exit_signal: Some(Signal::SIGCHLD),
+            tls: None,
+        }
+    }
+}
+
+pub fn do_clone(thread: &Thread, clone_args: CloneArgs) -> KResult<u32> {
+    let mut procs = Task::block_on(ProcessList::get().write());
+
+    let thread_builder = ThreadBuilder::new().clone_from(&thread, &clone_args)?;
+    let current_process = thread.process.clone();
+
+    let new_pid = alloc_pid();
+
+    let new_thread = if clone_args.flags.contains(CloneFlags::CLONE_THREAD) {
+        let new_thread = thread_builder
+            .process(current_process)
+            .tid(new_pid)
+            .tls(clone_args.tls)
+            .build(&mut procs);
+        new_thread
+    } else {
+        let current_pgroup = current_process.pgroup(procs.prove()).clone();
+        let current_session = current_process.session(procs.prove()).clone();
+
+        let (new_thread, new_process) = ProcessBuilder::new()
+            .clone_from(current_process, &clone_args)
+            .pid(new_pid)
+            .pgroup(current_pgroup)
+            .session(current_session)
+            .thread_builder(thread_builder)
+            .build(&mut procs);
+        new_thread
+    };
+
+    if let Some(parent_tid_ptr) = clone_args.parent_tid_ptr {
+        UserPointerMut::new(parent_tid_ptr as *mut u32)?.write(new_pid)?
+    }
+
+    Scheduler::get().spawn::<KernelStack, _>(new_thread_runnable(new_thread));
+
+    Ok(new_pid)
+}

+ 258 - 0
src/kernel/task/futex.rs

@@ -0,0 +1,258 @@
+use core::pin::pin;
+
+use alloc::sync::Arc;
+use alloc::vec::Vec;
+use bitflags::bitflags;
+use eonix_sync::{LazyLock, Mutex, MutexGuard, WaitList};
+use intrusive_collections::{intrusive_adapter, KeyAdapter, RBTree, RBTreeAtomicLink};
+
+use crate::{
+    kernel::{
+        constants::{EAGAIN, EINVAL},
+        user::UserPointer,
+    },
+    prelude::KResult,
+};
+
+#[derive(PartialEq, Debug, Clone, Copy)]
+#[repr(u32)]
+#[expect(non_camel_case_types)]
+pub enum FutexOp {
+    FUTEX_WAIT = 0,
+    FUTEX_WAKE = 1,
+    FUTEX_FD = 2,
+    FUTEX_REQUEUE = 3,
+    FUTEX_CMP_REQUEUE = 4,
+    FUTEX_WAKE_OP = 5,
+    FUTEX_LOCK_PI = 6,
+    FUTEX_UNLOCK_PI = 7,
+    FUTEX_TRYLOCK_PI = 8,
+    FUTEX_WAIT_BITSET = 9,
+    FUTEX_WAKE_BITSET = 10,
+}
+
+impl TryFrom<u32> for FutexOp {
+    type Error = u32;
+
+    fn try_from(value: u32) -> Result<Self, Self::Error> {
+        match value {
+            0 => Ok(FutexOp::FUTEX_WAIT),
+            1 => Ok(FutexOp::FUTEX_WAKE),
+            2 => Ok(FutexOp::FUTEX_FD),
+            3 => Ok(FutexOp::FUTEX_REQUEUE),
+            4 => Ok(FutexOp::FUTEX_CMP_REQUEUE),
+            5 => Ok(FutexOp::FUTEX_WAKE_OP),
+            6 => Ok(FutexOp::FUTEX_LOCK_PI),
+            7 => Ok(FutexOp::FUTEX_UNLOCK_PI),
+            8 => Ok(FutexOp::FUTEX_TRYLOCK_PI),
+            9 => Ok(FutexOp::FUTEX_WAIT_BITSET),
+            10 => Ok(FutexOp::FUTEX_WAKE_BITSET),
+            _ => Err(EINVAL),
+        }
+    }
+}
+
+bitflags! {
+    pub struct FutexFlags : u32 {
+        const FUTEX_PRIVATE         = 128;
+        const FUTEX_CLOCK_REALTIME  = 256;
+    }
+}
+
+const FUTEX_OP_MASK: u32 = 0x0000_000F;
+const FUTEX_FLAGS_MASK: u32 = 0xFFFF_FFF0;
+
+pub fn parse_futexop(bits: u32) -> KResult<(FutexOp, FutexFlags)> {
+    let op = FutexOp::try_from(bits & FUTEX_OP_MASK)?;
+
+    let flags = {
+        let flags_bits = bits & FUTEX_FLAGS_MASK;
+        FutexFlags::from_bits(flags_bits).ok_or(EINVAL)
+    }?;
+
+    Ok((op, flags))
+}
+
+struct FutexTable {
+    futex_buckets: Vec<Mutex<FutexBucket>>,
+}
+
+struct FutexBucket {
+    futex_items: RBTree<FutexItemAdapter>,
+}
+
+intrusive_adapter!(
+    FutexItemAdapter = Arc<FutexItem>: FutexItem { link: RBTreeAtomicLink }
+);
+
+impl<'a> KeyAdapter<'a> for FutexItemAdapter {
+    type Key = FutexKey;
+    fn get_key(&self, item: &'a FutexItem) -> Self::Key {
+        item.key
+    }
+}
+
+struct FutexItem {
+    link: RBTreeAtomicLink,
+    key: FutexKey,
+    // // A list of waiters that are waiting on this futex.
+    wait_list: WaitList,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
+struct FutexKey {
+    addr: usize,
+    pid: Option<u32>,
+}
+
+impl FutexKey {
+    fn new(addr: usize, pid: Option<u32>) -> Self {
+        FutexKey { addr, pid }
+    }
+
+    fn hash(&self) -> usize {
+        (self.addr >> 2) + self.pid.unwrap_or(0) as usize
+    }
+}
+
+static FUTEX_TABLE: LazyLock<FutexTable> = LazyLock::new(FutexTable::new);
+
+const FUTEX_BUCKETS_NUM: usize = 100;
+
+impl FutexTable {
+    fn new() -> Self {
+        let futex_buckets = (0..FUTEX_BUCKETS_NUM)
+            .map(|_| Mutex::new(FutexBucket::new()))
+            .collect();
+        FutexTable { futex_buckets }
+    }
+
+    fn get_bucket(&self, key: &FutexKey) -> (usize, &Mutex<FutexBucket>) {
+        let idx = key.hash() % self.futex_buckets.len();
+        (idx, &self.futex_buckets[idx])
+    }
+}
+
+impl FutexBucket {
+    fn new() -> Self {
+        FutexBucket {
+            futex_items: RBTree::new(FutexItemAdapter::new()),
+        }
+    }
+
+    fn find(&mut self, key: FutexKey) -> Option<Arc<FutexItem>> {
+        let cursor = self.futex_items.find(&key);
+        cursor.clone_pointer()
+    }
+
+    fn find_or_insert(&mut self, key: FutexKey) -> Arc<FutexItem> {
+        if let Some(item) = self.find(key) {
+            return item;
+        }
+
+        let item = Arc::new(FutexItem {
+            link: RBTreeAtomicLink::new(),
+            key,
+            wait_list: WaitList::new(),
+        });
+
+        self.futex_items.insert(item.clone());
+
+        item
+    }
+}
+
+pub async fn futex_wait(
+    uaddr: usize,
+    pid: Option<u32>,
+    expected_val: u32,
+    _timeout: Option<u32>,
+) -> KResult<()> {
+    let futex_key = FutexKey::new(uaddr, pid);
+
+    let futex_item = {
+        let (_, futex_bucket_ref) = FUTEX_TABLE.get_bucket(&futex_key);
+        let mut futex_bucket = futex_bucket_ref.lock().await;
+
+        let val = UserPointer::new(uaddr as *const u32)?.read()?;
+
+        if val != expected_val {
+            return Err(EAGAIN);
+        }
+
+        futex_bucket.find_or_insert(futex_key)
+    };
+
+    let mut wait = pin!(futex_item.wait_list.prepare_to_wait());
+    wait.as_mut().add_to_wait_list();
+    wait.await;
+    Ok(())
+}
+
+pub async fn futex_wake(uaddr: usize, pid: Option<u32>, max_wake_count: u32) -> KResult<usize> {
+    let futex_key = FutexKey::new(uaddr, pid);
+
+    let (_, futex_bucket_ref) = FUTEX_TABLE.get_bucket(&futex_key);
+    let mut futex_bucket = futex_bucket_ref.lock().await;
+
+    if let Some(futex_item) = futex_bucket.find(futex_key) {
+        let mut count = 0;
+        loop {
+            let not_empty = futex_item.wait_list.notify_one();
+            if not_empty == false || count == max_wake_count {
+                break;
+            }
+            count += 1;
+        }
+        return Ok(count as usize);
+    }
+
+    Ok(0)
+}
+
+// should make sure that two keys in different buckets
+// make the lock order to avoid deadlocks
+async fn double_lock_bucket(
+    futex_key0: FutexKey,
+    futex_key1: FutexKey,
+) -> (
+    MutexGuard<'static, FutexBucket>,
+    MutexGuard<'static, FutexBucket>,
+) {
+    let (bucket_idx0, bucket_ref0) = FUTEX_TABLE.get_bucket(&futex_key0);
+    let (bucket_idx1, bucket_ref1) = FUTEX_TABLE.get_bucket(&futex_key1);
+
+    if bucket_idx0 < bucket_idx1 {
+        let bucket0 = bucket_ref0.lock().await;
+        let bucket1 = bucket_ref1.lock().await;
+        (bucket0, bucket1)
+    } else {
+        let bucket1 = bucket_ref1.lock().await;
+        let bucket0 = bucket_ref0.lock().await;
+        (bucket0, bucket1)
+    }
+}
+
+async fn futex_requeue(
+    uaddr: usize,
+    pid: Option<u32>,
+    wake_count: u32,
+    requeue_uaddr: usize,
+    requeue_count: u32,
+) -> KResult<usize> {
+    let futex_key = FutexKey::new(uaddr, pid);
+    let futex_requeue_key = FutexKey::new(requeue_uaddr, pid);
+
+    let (bucket_idx0, bucket_ref0) = FUTEX_TABLE.get_bucket(&futex_key);
+    let (bucket_idx1, bucket_ref1) = FUTEX_TABLE.get_bucket(&futex_requeue_key);
+
+    if bucket_idx0 == bucket_idx1 {
+        // If the keys are the same, we can just wake up the waiters.
+        return futex_wake(uaddr, pid, wake_count).await;
+    }
+
+    let (futex_bucket, futex_requeue_bucket) =
+        double_lock_bucket(futex_key, futex_requeue_key).await;
+
+    todo!()
+}

+ 47 - 19
src/kernel/task/process.rs

@@ -3,6 +3,7 @@ use super::{
     ProcessList, Session, Signal, Thread,
 };
 use crate::kernel::constants::{ECHILD, EINTR, EPERM, ESRCH};
+use crate::kernel::task::{CloneArgs, CloneFlags};
 use crate::{
     kernel::mem::MMList,
     prelude::*,
@@ -25,10 +26,12 @@ use posix_types::constants::{CLD_CONTINUED, CLD_DUMPED, CLD_EXITED, CLD_KILLED,
 
 pub struct ProcessBuilder {
     mm_list: Option<MMList>,
+    exit_signal: Option<Signal>,
     parent: Option<Arc<Process>>,
     thread_builder: Option<ThreadBuilder>,
     pgroup: Option<Arc<ProcessGroup>>,
     session: Option<Arc<Session>>,
+    pid: Option<u32>,
 }
 
 #[derive(Debug)]
@@ -41,6 +44,8 @@ pub struct Process {
     pub wait_list: WaitList,
     pub mm_list: MMList,
 
+    pub exit_signal: Option<Signal>,
+
     /// Parent process
     ///
     /// `parent` must be valid during the whole life of the process.
@@ -149,7 +154,9 @@ impl WaitObject {
 impl ProcessBuilder {
     pub fn new() -> Self {
         Self {
+            pid: None,
             mm_list: None,
+            exit_signal: None,
             parent: None,
             thread_builder: None,
             pgroup: None,
@@ -157,11 +164,35 @@ impl ProcessBuilder {
         }
     }
 
+    pub fn clone_from(mut self, process: Arc<Process>, clone_args: &CloneArgs) -> Self {
+        let mm_list = if clone_args.flags.contains(CloneFlags::CLONE_VM) {
+            Task::block_on(process.mm_list.new_shared())
+        } else {
+            Task::block_on(process.mm_list.new_cloned())
+        };
+
+        if let Some(exit_signal) = clone_args.exit_signal {
+            self = self.exit_signal(exit_signal)
+        }
+
+        self.mm_list(mm_list).parent(process)
+    }
+
+    pub fn exit_signal(mut self, exit_signal: Signal) -> Self {
+        self.exit_signal = Some(exit_signal);
+        self
+    }
+
     pub fn mm_list(mut self, mm_list: MMList) -> Self {
         self.mm_list = Some(mm_list);
         self
     }
 
+    pub fn pid(mut self, pid: u32) -> Self {
+        self.pid = Some(pid);
+        self
+    }
+
     pub fn parent(mut self, parent: Arc<Process>) -> Self {
         self.parent = Some(parent);
         self
@@ -182,18 +213,14 @@ impl ProcessBuilder {
         self
     }
 
-    fn alloc_pid() -> u32 {
-        static NEXT_PID: AtomicU32 = AtomicU32::new(1);
-        NEXT_PID.fetch_add(1, Ordering::Relaxed)
-    }
-
     pub fn build(self, process_list: &mut ProcessList) -> (Arc<Thread>, Arc<Process>) {
         let mm_list = self.mm_list.unwrap_or_else(|| MMList::new());
 
         let process = Arc::new(Process {
-            pid: Self::alloc_pid(),
+            pid: self.pid.expect("should set pid before building"),
             wait_list: WaitList::new(),
             mm_list,
+            exit_signal: self.exit_signal,
             parent: RCUPointer::empty(),
             pgroup: RCUPointer::empty(),
             session: RCUPointer::empty(),
@@ -299,7 +326,7 @@ impl Process {
                     return Ok(None);
                 }
 
-                waits = waits.wait().await?;
+                waits = waits.wait(no_block).await?;
             }
         };
 
@@ -460,9 +487,13 @@ impl Process {
         self.parent.load()
     }
 
-    pub fn notify(&self, wait: WaitObject, procs: Proof<'_, ProcessList>) {
+    pub fn notify(&self, signal: Option<Signal>, wait: WaitObject, procs: Proof<'_, ProcessList>) {
         self.wait_list.notify(wait);
-        self.raise(Signal::SIGCHLD, procs);
+
+        if let Some(signal) = signal {
+            // If we have a signal, we raise it to the process.
+            self.raise(signal, procs);
+        }
     }
 
     pub fn notify_batch(&self) -> NotifyBatch<'_, '_, '_> {
@@ -473,14 +504,6 @@ impl Process {
             needs_notify: false,
         }
     }
-
-    pub async fn force_kill(self: &Arc<Self>, signal: Signal) {
-        let mut proc_list = ProcessList::get().write().await;
-        unsafe {
-            // SAFETY: Preemption is disabled.
-            proc_list.do_kill_process(self, WaitType::Signaled(signal));
-        }
-    }
 }
 
 impl WaitList {
@@ -541,14 +564,14 @@ impl Entry<'_, '_, '_> {
         }
     }
 
-    pub fn wait(self) -> impl core::future::Future<Output = KResult<Self>> {
+    pub fn wait(self, no_block: bool) -> impl core::future::Future<Output = KResult<Self>> {
         let wait_procs = self.wait_procs.unlock();
 
         async move {
             let process_list = self.cv.wait(self.process_list).await;
             let wait_procs = wait_procs.relock().await;
 
-            if Thread::current().signal_list.has_pending_signal() {
+            if !no_block && Thread::current().signal_list.has_pending_signal() {
                 Err(EINTR)
             } else {
                 Ok(Self {
@@ -595,3 +618,8 @@ impl Drop for NotifyBatch<'_, '_, '_> {
         }
     }
 }
+
+pub fn alloc_pid() -> u32 {
+    static NEXT_PID: AtomicU32 = AtomicU32::new(1);
+    NEXT_PID.fetch_add(1, Ordering::Relaxed)
+}

+ 79 - 46
src/kernel/task/process_list.rs

@@ -1,7 +1,10 @@
 use core::sync::atomic::Ordering;
 
 use super::{Process, ProcessGroup, Session, Thread, WaitObject, WaitType};
-use crate::rcu::rcu_sync;
+use crate::{
+    kernel::{task::futex_wake, user::UserPointerMut},
+    rcu::rcu_sync,
+};
 use alloc::{
     collections::btree_map::BTreeMap,
     sync::{Arc, Weak},
@@ -105,63 +108,93 @@ impl ProcessList {
     /// This function will destroy the process and all its threads.
     /// It is the caller's responsibility to ensure that the process is not
     /// running or will not run after this function is called.
-    pub unsafe fn do_kill_process(&mut self, process: &Arc<Process>, status: WaitType) {
+    pub async unsafe fn do_exit(
+        &mut self,
+        thread: &Thread,
+        exit_status: WaitType,
+        is_exiting_group: bool,
+    ) {
+        let process = thread.process.clone();
+
         if process.pid == 1 {
             panic!("init exited");
         }
 
         let inner = process.inner.access_mut(self.prove_mut());
-        // TODO!!!!!!: When we are killing multiple threads, we need to wait until all
-        // the threads are stopped then proceed.
-        for thread in inner.threads.values().map(|t| t.upgrade().unwrap()) {
-            assert!(thread.tid == Thread::current().tid);
+
+        thread.dead.store(true, Ordering::SeqCst);
+
+        if is_exiting_group {
             // TODO: Send SIGKILL to all threads.
-            thread.files.close_all();
-            thread.dead.store(true, Ordering::SeqCst);
+            // todo!()
         }
 
-        // If we are the session leader, we should drop the control terminal.
-        if process.session(self.prove()).sid == process.pid {
-            if let Some(terminal) =
-                Task::block_on(process.session(self.prove()).drop_control_terminal())
-            {
-                terminal.drop_session();
-            }
+        if thread.tid != process.pid {
+            self.threads.remove(&thread.tid);
+            inner.threads.remove(&thread.tid).unwrap();
         }
 
-        // Release the MMList as well as the page table.
-        unsafe {
-            // SAFETY: We are exiting the process, so no one might be using it.
-            process.mm_list.replace(None);
-        }
+        if let Some(clear_ctid) = thread.get_clear_ctid() {
+            UserPointerMut::new(clear_ctid as *mut u32)
+                .unwrap()
+                .write(0u32)
+                .expect("should clear child tid successfully");
 
-        // Make children orphans (adopted by init)
-        {
-            let init = self.init_process();
-            inner.children.retain(|_, child| {
-                let child = child.upgrade().unwrap();
-                // SAFETY: `child.parent` must be ourself. So we don't need to free it.
-                unsafe { child.parent.swap(Some(init.clone())) };
-                init.add_child(&child, self.prove_mut());
-
-                false
-            });
+            futex_wake(clear_ctid, None, 1)
+                .await
+                .expect("should wake up child tid");
         }
 
-        let mut init_notify = self.init_process().notify_batch();
-        process
-            .wait_list
-            .drain_exited()
-            .into_iter()
-            .for_each(|item| init_notify.notify(item));
-        init_notify.finish(self.prove());
-
-        process.parent(self.prove()).notify(
-            WaitObject {
-                pid: process.pid,
-                code: status,
-            },
-            self.prove(),
-        );
+        // main thread exit
+        if thread.tid == process.pid {
+            assert_eq!(thread.tid, process.pid);
+
+            thread.files.close_all();
+
+            // If we are the session leader, we should drop the control terminal.
+            if process.session(self.prove()).sid == process.pid {
+                if let Some(terminal) =
+                    Task::block_on(process.session(self.prove()).drop_control_terminal())
+                {
+                    terminal.drop_session();
+                }
+            }
+
+            // Release the MMList as well as the page table.
+            unsafe {
+                // SAFETY: We are exiting the process, so no one might be using it.
+                process.mm_list.replace(None);
+            }
+
+            // Make children orphans (adopted by init)
+            {
+                let init = self.init_process();
+                inner.children.retain(|_, child| {
+                    let child = child.upgrade().unwrap();
+                    // SAFETY: `child.parent` must be ourself. So we don't need to free it.
+                    unsafe { child.parent.swap(Some(init.clone())) };
+                    init.add_child(&child, self.prove_mut());
+
+                    false
+                });
+            }
+
+            let mut init_notify = self.init_process().notify_batch();
+            process
+                .wait_list
+                .drain_exited()
+                .into_iter()
+                .for_each(|item| init_notify.notify(item));
+            init_notify.finish(self.prove());
+
+            process.parent(self.prove()).notify(
+                process.exit_signal,
+                WaitObject {
+                    pid: process.pid,
+                    code: exit_status,
+                },
+                self.prove(),
+            );
+        }
     }
 }

+ 48 - 4
src/kernel/task/signal.rs

@@ -6,6 +6,7 @@ use super::{ProcessList, Thread, WaitObject, WaitType};
 use crate::kernel::constants::{EFAULT, EINVAL};
 use crate::{kernel::user::UserPointer, prelude::*};
 use alloc::collections::binary_heap::BinaryHeap;
+use alloc::sync::Arc;
 use core::{cmp::Reverse, task::Waker};
 use eonix_hal::fpu::FpuState;
 use eonix_hal::traits::trap::RawTrapContext;
@@ -30,7 +31,7 @@ struct SignalListInner {
     stop_waker: Option<Waker>,
 
     // TODO!!!!!: Signal disposition should be per-process.
-    actions: SignalActionList,
+    actions: Arc<SignalActionList>,
 }
 
 pub struct SignalList {
@@ -105,7 +106,7 @@ impl SignalList {
                 pending: BinaryHeap::new(),
                 signal_waker: None,
                 stop_waker: None,
-                actions: SignalActionList::new(),
+                actions: Arc::new(SignalActionList::new()),
             }),
         }
     }
@@ -214,6 +215,7 @@ impl SignalList {
                     let thread = Thread::current();
                     if let Some(parent) = thread.process.parent.load() {
                         parent.notify(
+                            Some(Signal::SIGCHLD),
                             WaitObject {
                                 pid: thread.process.pid,
                                 code: WaitType::Stopped(signal),
@@ -238,6 +240,7 @@ impl SignalList {
 
                     if let Some(parent) = thread.process.parent.load() {
                         parent.notify(
+                            Some(Signal::SIGCHLD),
                             WaitObject {
                                 pid: thread.process.pid,
                                 code: WaitType::Continued,
@@ -247,8 +250,8 @@ impl SignalList {
                     }
                 }
                 signal => {
-                    // Default to terminate the process.
-                    Thread::current().process.force_kill(signal).await;
+                    // Default to terminate the thread.
+                    Thread::current().force_kill(signal).await;
                     return;
                 }
             }
@@ -273,3 +276,44 @@ impl SignalList {
         Ok(())
     }
 }
+
+impl SignalList {
+    pub fn new_cloned(other: &Self) -> Self {
+        let inner = other.inner.lock();
+
+        debug_assert!(
+            inner.stop_waker.is_none(),
+            "We should not have a stop waker here"
+        );
+
+        Self {
+            inner: Spin::new(SignalListInner {
+                mask: inner.mask,
+                pending: BinaryHeap::new(),
+                signal_waker: None,
+                stop_waker: None,
+                actions: SignalActionList::new_cloned(&inner.actions),
+            }),
+        }
+    }
+
+    // shared only signal actions
+    pub fn new_shared(other: &Self) -> Self {
+        let inner = other.inner.lock();
+
+        debug_assert!(
+            inner.stop_waker.is_none(),
+            "We should not have a stop waker here"
+        );
+
+        Self {
+            inner: Spin::new(SignalListInner {
+                mask: inner.mask,
+                pending: BinaryHeap::new(),
+                signal_waker: None,
+                stop_waker: None,
+                actions: SignalActionList::new_shared(&inner.actions),
+            }),
+        }
+    }
+}

+ 23 - 9
src/kernel/task/signal/signal_action.rs

@@ -7,10 +7,11 @@ use crate::{
     },
     SIGNAL_NOW,
 };
-use alloc::collections::btree_map::BTreeMap;
+use alloc::{collections::btree_map::BTreeMap, sync::Arc};
 use core::num::NonZero;
 use eonix_hal::{fpu::FpuState, traits::trap::RawTrapContext, trap::TrapContext};
 use eonix_mm::address::{Addr as _, AddrOps as _, VAddr};
+use eonix_sync::Spin;
 use posix_types::signal::{SigAction, TryFromSigAction};
 
 #[derive(Debug, Clone, Copy)]
@@ -26,38 +27,51 @@ pub enum SignalAction {
 
 #[derive(Debug)]
 pub struct SignalActionList {
-    actions: BTreeMap<Signal, SignalAction>,
+    actions: Spin<BTreeMap<Signal, SignalAction>>,
+}
+
+impl SignalActionList {
+    pub fn new_shared(other: &Arc<Self>) -> Arc<Self> {
+        other.clone()
+    }
+
+    pub fn new_cloned(other: &Self) -> Arc<Self> {
+        Arc::new(Self {
+            actions: Spin::new(other.actions.lock().clone()),
+        })
+    }
 }
 
 impl SignalActionList {
     pub const fn new() -> Self {
         Self {
-            actions: BTreeMap::new(),
+            actions: Spin::new(BTreeMap::new()),
         }
     }
 
-    pub fn set(&mut self, signal: Signal, action: SignalAction) {
+    pub fn set(&self, signal: Signal, action: SignalAction) {
         debug_assert!(
             !matches!(signal, SIGNAL_NOW!()),
             "SIGSTOP and SIGKILL should not be set for a handler."
         );
         match action {
-            SignalAction::Default => self.actions.remove(&signal),
-            _ => self.actions.insert(signal, action),
+            SignalAction::Default => self.actions.lock().remove(&signal),
+            _ => self.actions.lock().insert(signal, action),
         };
     }
 
     pub fn get(&self, signal: Signal) -> SignalAction {
-        match self.actions.get(&signal) {
+        match self.actions.lock().get(&signal) {
             None => SignalAction::Default,
             Some(action) => action.clone(),
         }
     }
 
-    pub fn remove_non_ignore(&mut self) {
+    pub fn remove_non_ignore(&self) {
         // Remove all custom handlers except for the ignore action.
         // Default handlers should never appear in the list so we don't consider that.
         self.actions
+            .lock()
             .retain(|_, action| matches!(action, SignalAction::Ignore));
     }
 }
@@ -113,7 +127,7 @@ impl SignalAction {
 impl Clone for SignalActionList {
     fn clone(&self) -> Self {
         Self {
-            actions: self.actions.clone(),
+            actions: Spin::new(self.actions.lock().clone()),
         }
     }
 }

+ 86 - 82
src/kernel/task/thread.rs

@@ -1,13 +1,14 @@
 use super::{
     signal::{RaiseResult, Signal, SignalList},
-    Process, ProcessList,
+    Process, ProcessList, WaitType,
 };
 use crate::{
     kernel::{
         interrupt::default_irq_handler,
         syscall::{syscall_handlers, SyscallHandler},
+        task::{clone::CloneArgs, CloneFlags},
         timer::timer_interrupt,
-        user::dataflow::CheckedUserPointer,
+        user::UserPointerMut,
         vfs::{filearray::FileArray, FsContext},
     },
     prelude::*,
@@ -22,7 +23,6 @@ use core::{
     task::{Context, Poll, Waker},
 };
 use eonix_hal::{
-    context::TaskContext,
     fpu::FpuState,
     processor::{UserTLS, CPU},
     traits::{
@@ -54,6 +54,7 @@ pub struct ThreadBuilder {
     signal_list: Option<SignalList>,
     tls: Option<UserTLS>,
     set_child_tid: Option<usize>,
+    clear_child_tid: Option<usize>,
 
     trap_ctx: Option<TrapContext>,
     fpu_state: Option<FpuState>,
@@ -69,7 +70,9 @@ struct ThreadInner {
 
     /// User pointer
     /// Store child thread's tid when child thread returns to user space.
-    set_child_tid: usize,
+    set_child_tid: Option<usize>,
+
+    clear_child_tid: Option<usize>,
 }
 
 pub struct Thread {
@@ -89,46 +92,6 @@ pub struct Thread {
     inner: Spin<ThreadInner>,
 }
 
-#[repr(transparent)]
-#[derive(Debug, Clone, Copy)]
-pub struct UserDescriptorFlags(u32);
-
-#[repr(C)]
-#[derive(Debug, Clone, Copy)]
-pub struct UserDescriptor {
-    entry: u32,
-    base: u32,
-    limit: u32,
-    flags: UserDescriptorFlags,
-}
-
-#[allow(dead_code)]
-impl UserDescriptorFlags {
-    fn is_32bit_segment(&self) -> bool {
-        self.0 & 0b1 != 0
-    }
-
-    fn contents(&self) -> u32 {
-        self.0 & 0b110
-    }
-
-    fn is_read_exec_only(&self) -> bool {
-        self.0 & 0b1000 != 0
-    }
-
-    fn is_limit_in_pages(&self) -> bool {
-        self.0 & 0b10000 != 0
-    }
-
-    fn is_present(&self) -> bool {
-        self.0 & 0b100000 == 0
-    }
-
-    fn is_usable(&self) -> bool {
-        self.0 & 0b1000000 != 0
-    }
-}
-
 impl ThreadBuilder {
     pub fn new() -> Self {
         Self {
@@ -140,6 +103,7 @@ impl ThreadBuilder {
             signal_list: None,
             tls: None,
             set_child_tid: None,
+            clear_child_tid: None,
             trap_ctx: None,
             fpu_state: None,
         }
@@ -180,8 +144,13 @@ impl ThreadBuilder {
         self
     }
 
-    pub fn set_child_tid(mut self, set_child_tid: usize) -> Self {
-        self.set_child_tid = Some(set_child_tid);
+    pub fn set_child_tid(mut self, set_child_tid: Option<usize>) -> Self {
+        self.set_child_tid = set_child_tid;
+        self
+    }
+
+    pub fn clear_child_tid(mut self, clear_child_tid: Option<usize>) -> Self {
+        self.clear_child_tid = clear_child_tid;
         self
     }
 
@@ -206,23 +175,51 @@ impl ThreadBuilder {
         self
     }
 
-    /// Fork the thread from another thread.
-    ///
-    /// Sets the thread's files, fs_context, signal_list, name, tls, and set_child_tid
-    pub fn fork_from(self, thread: &Thread) -> Self {
+    /// Clone the thread from another thread.
+    pub fn clone_from(self, thread: &Thread, clone_args: &CloneArgs) -> KResult<Self> {
         let inner = thread.inner.lock();
 
         let mut trap_ctx = thread.trap_ctx.borrow().clone();
         trap_ctx.set_user_return_value(0);
 
-        self.files(FileArray::new_cloned(&thread.files))
-            .fs_context(FsContext::new_cloned(&thread.fs_context))
-            .signal_list(thread.signal_list.clone())
+        #[cfg(target_arch = "riscv64")]
+        {
+            let pc = trap_ctx.get_program_counter();
+            trap_ctx.set_program_counter(pc + 4);
+        }
+
+        if let Some(sp) = clone_args.sp {
+            trap_ctx.set_stack_pointer(sp.get());
+        }
+
+        let fs_context = if clone_args.flags.contains(CloneFlags::CLONE_FS) {
+            FsContext::new_shared(&thread.fs_context)
+        } else {
+            FsContext::new_cloned(&thread.fs_context)
+        };
+
+        let files = if clone_args.flags.contains(CloneFlags::CLONE_FILES) {
+            FileArray::new_shared(&thread.files)
+        } else {
+            FileArray::new_cloned(&thread.files)
+        };
+
+        let signal_list = if clone_args.flags.contains(CloneFlags::CLONE_SIGHAND) {
+            SignalList::new_shared(&thread.signal_list)
+        } else {
+            SignalList::new_cloned(&thread.signal_list)
+        };
+
+        Ok(self
+            .files(files)
+            .fs_context(fs_context)
+            .signal_list(signal_list)
             .name(inner.name.clone())
-            .tls(inner.tls.clone())
-            .set_child_tid(inner.set_child_tid)
+            .tls(clone_args.tls.clone())
+            .set_child_tid(clone_args.set_tid_ptr)
+            .clear_child_tid(clone_args.clear_tid_ptr)
             .trap_ctx(trap_ctx)
-            .fpu_state(thread.fpu_state.borrow().clone())
+            .fpu_state(thread.fpu_state.borrow().clone()))
     }
 
     pub fn build(self, process_list: &mut ProcessList) -> Arc<Thread> {
@@ -234,7 +231,6 @@ impl ThreadBuilder {
             .fs_context
             .unwrap_or_else(|| FsContext::global().clone());
         let signal_list = self.signal_list.unwrap_or_else(|| SignalList::new());
-        let set_child_tid = self.set_child_tid.unwrap_or(0);
         let trap_ctx = self.trap_ctx.expect("TrapContext is not set");
         let fpu_state = self.fpu_state.unwrap_or_else(FpuState::new);
 
@@ -252,7 +248,8 @@ impl ThreadBuilder {
             inner: Spin::new(ThreadInner {
                 name,
                 tls: self.tls,
-                set_child_tid,
+                set_child_tid: self.set_child_tid,
+                clear_child_tid: self.clear_child_tid,
             }),
         });
 
@@ -285,28 +282,8 @@ impl Thread {
         }
     }
 
-    pub fn set_thread_area(&self, desc: &mut UserDescriptor) -> KResult<()> {
-        let mut inner = self.inner.lock();
-
-        // Clear the TLS area if it is not present.
-        if desc.flags.is_read_exec_only() && !desc.flags.is_present() {
-            if desc.limit == 0 || desc.base == 0 {
-                return Ok(());
-            }
-
-            let len = if desc.flags.is_limit_in_pages() {
-                (desc.limit as usize) << 12
-            } else {
-                desc.limit as usize
-            };
-
-            CheckedUserPointer::new(desc.base as _, len)?.zero()?;
-            return Ok(());
-        }
-
-        let (tls, entry) = UserTLS::new32(desc.base, desc.limit, desc.flags.is_limit_in_pages());
-        desc.entry = entry;
-        inner.tls = Some(tls);
+    pub fn set_user_tls(&self, tls: UserTLS) -> KResult<()> {
+        self.inner.lock().tls = Some(tls);
         Ok(())
     }
 
@@ -318,6 +295,18 @@ impl Thread {
         self.inner.lock().name.clone()
     }
 
+    pub fn clear_child_tid(&self, clear_child_tid: Option<usize>) {
+        self.inner.lock().clear_child_tid = clear_child_tid;
+    }
+
+    pub fn get_set_ctid(&self) -> Option<usize> {
+        self.inner.lock().set_child_tid
+    }
+
+    pub fn get_clear_ctid(&self) -> Option<usize> {
+        self.inner.lock().clear_child_tid
+    }
+
     pub fn handle_syscall(&self, no: usize, args: [usize; 6]) -> Option<usize> {
         match syscall_handlers().get(no) {
             Some(Some(SyscallHandler {
@@ -333,12 +322,27 @@ impl Thread {
         }
     }
 
+    pub async fn force_kill(&self, signal: Signal) {
+        let mut proc_list = ProcessList::get().write().await;
+        unsafe {
+            // SAFETY: Preemption is disabled.
+            proc_list
+                .do_exit(self, WaitType::Signaled(signal), false)
+                .await;
+        }
+    }
+
     pub fn is_dead(&self) -> bool {
         self.dead.load(Ordering::SeqCst)
     }
 
     async fn real_run(&self) {
-        let mut task_context = TaskContext::new();
+        if let Some(set_ctid) = self.get_set_ctid() {
+            UserPointerMut::new(set_ctid as *mut u32)
+                .expect("set_child_tid pointer is invalid")
+                .write(self.tid)
+                .expect("set_child_tid write failed");
+        }
 
         while !self.is_dead() {
             if self.signal_list.has_pending_signal() {
@@ -355,7 +359,7 @@ impl Thread {
 
             unsafe {
                 // SAFETY: We are returning to the context of the user thread.
-                self.trap_ctx.borrow().trap_return(&mut task_context);
+                self.trap_ctx.borrow().trap_return();
             }
 
             self.fpu_state.borrow().save();

+ 2 - 0
src/lib.rs

@@ -20,6 +20,7 @@ mod prelude;
 mod rcu;
 mod sync;
 
+use crate::kernel::task::alloc_pid;
 use alloc::{ffi::CString, sync::Arc};
 use core::{
     hint::spin_loop,
@@ -206,6 +207,7 @@ async fn init_process(early_kstack: PRange) {
 
     let mut process_list = Task::block_on(ProcessList::get().write());
     let (thread, process) = ProcessBuilder::new()
+        .pid(alloc_pid())
         .mm_list(load_info.mm_list)
         .thread_builder(thread_builder)
         .build(&mut process_list);

BIN
user-programs/pthread_test