Przeglądaj źródła

mm, proc: add an exited thread reaper

- thd.exit() will set thd.dead and send it to the reaper
- delay the release of process mm until we reap it
- extract futex logic out of exit and exec routine

Signed-off-by: greatbridf <greatbridf@icloud.com>
greatbridf 2 tygodni temu
rodzic
commit
b6d54d6a0f

+ 15 - 15
src/kernel/mem/mm_list.rs

@@ -398,20 +398,6 @@ impl MMList {
         assert_ne!(old_user_count, 0);
     }
 
-    /// Deactivate `self` and activate `to` with root page table changed only once.
-    /// This might reduce the overhead of switching page tables twice.
-    #[allow(dead_code)]
-    pub fn switch(&self, to: &Self) {
-        self.user_count.fetch_add(1, Ordering::Acquire);
-
-        let root_page_table = self.root_page_table.load(Ordering::Relaxed);
-        assert_ne!(root_page_table, 0);
-        set_root_page_table_pfn(PFN::from(PAddr::from(root_page_table)));
-
-        let old_user_count = to.user_count.fetch_sub(1, Ordering::Release);
-        assert_ne!(old_user_count, 0);
-    }
-
     /// Replace the current page table with a new one.
     ///
     /// # Safety
@@ -454,10 +440,24 @@ impl MMList {
 
         // TODO: Check whether we should wake someone up if they've been put
         //       to sleep when calling `vfork`.
-        self.inner
+        let old_mm = self
+            .inner
             .swap(new.map(|new_mm| new_mm.inner.swap(None)).flatten());
 
         eonix_preempt::enable();
+
+        // This could take long...
+        drop(old_mm);
+    }
+
+    pub fn release(&self) {
+        let old_mm = self.inner.swap(None);
+        let old_table = self.root_page_table.swap(0, Ordering::Relaxed);
+
+        // TODO: Remove this completely...
+        // XXX: `ArcSwap` is broken and never safe to use. Check `replace` above.
+        assert_ne!(old_table, 0, "Already released?");
+        assert!(old_mm.is_some(), "Already released?");
     }
 
     /// No need to do invalidation manually, `PageTable` already does it.

+ 7 - 19
src/kernel/syscall/procops.rs

@@ -22,8 +22,8 @@ use crate::kernel::constants::{
 use crate::kernel::mem::PageBuffer;
 use crate::kernel::syscall::{User, UserMut};
 use crate::kernel::task::{
-    do_clone, futex_wait, futex_wake, parse_futexop, yield_now, CloneArgs, FutexFlags, FutexOp,
-    ProcessList, ProgramLoader, RobustListHead, SignalAction, Thread, WaitId, WaitType,
+    do_clone, futex_exec, futex_wait, futex_wake, parse_futexop, yield_now, CloneArgs, FutexFlags,
+    FutexOp, ProcessList, ProgramLoader, RobustListHead, SignalAction, Thread, WaitId, WaitType,
 };
 use crate::kernel::timer::sleep;
 use crate::kernel::user::{UserBuffer, UserPointer, UserPointerMut, UserString};
@@ -213,10 +213,7 @@ async fn execve(exec: User<u8>, argv: User<PtrT>, envp: User<PtrT>) -> KResult<S
         .load()
         .await?;
 
-    if let Some(robust_list) = thread.get_robust_list() {
-        let _ = robust_list.wake_all().await;
-        thread.set_robust_list(None);
-    }
+    futex_exec(thread).await;
 
     unsafe {
         // SAFETY: We are doing execve, all other threads are terminated.
@@ -240,24 +237,15 @@ async fn execve(exec: User<u8>, argv: User<PtrT>, envp: User<PtrT>) -> KResult<S
 
 #[eonix_macros::define_syscall(SYS_EXIT)]
 async fn exit(status: u32) -> SyscallNoReturn {
-    let mut procs = ProcessList::get().write().await;
-
-    unsafe {
-        procs
-            .do_exit(&thread, WaitType::Exited(status), false)
-            .await;
-    }
+    thread.exit(WaitType::Exited(status));
 
     SyscallNoReturn
 }
 
 #[eonix_macros::define_syscall(SYS_EXIT_GROUP)]
 async fn exit_group(status: u32) -> SyscallNoReturn {
-    let mut procs = ProcessList::get().write().await;
-
-    unsafe {
-        procs.do_exit(&thread, WaitType::Exited(status), true).await;
-    }
+    // XXX: Send SIGKILL to our sibling threads.
+    thread.exit(WaitType::Exited(status));
 
     SyscallNoReturn
 }
@@ -856,7 +844,7 @@ async fn rt_sigreturn() -> KResult<SyscallNoReturn> {
             "`rt_sigreturn` failed in thread {} with error {err}!",
             thread.tid
         );
-        thread.force_kill(Signal::SIGSEGV).await;
+        thread.force_kill(Signal::SIGSEGV);
         return Err(err);
     }
 

+ 4 - 1
src/kernel/task.rs

@@ -12,7 +12,10 @@ mod user_tls;
 
 pub use clone::{do_clone, CloneArgs, CloneFlags};
 use eonix_hal::symbol_addr;
-pub use futex::{futex_wait, futex_wake, parse_futexop, FutexFlags, FutexOp, RobustListHead};
+pub use futex::{
+    futex_exec, futex_exit, futex_wait, futex_wake, parse_futexop, FutexFlags, FutexOp,
+    RobustListHead,
+};
 pub use kernel_stack::KernelStack;
 pub use loader::ProgramLoader;
 pub use process::{alloc_pid, Process, ProcessBuilder, WaitId, WaitObject, WaitType};

+ 43 - 10
src/kernel/task/futex.rs

@@ -1,19 +1,17 @@
-use core::pin::pin;
-
 use alloc::sync::Arc;
 use alloc::vec::Vec;
+use core::pin::pin;
+
 use bitflags::bitflags;
+use eonix_mm::address::Addr;
 use eonix_sync::{LazyLock, Mutex, MutexGuard, WaitList};
 use intrusive_collections::{intrusive_adapter, KeyAdapter, RBTree, RBTreeAtomicLink};
 
-use crate::{
-    kernel::{
-        constants::{EAGAIN, EINVAL},
-        syscall::User,
-        user::UserPointer,
-    },
-    prelude::KResult,
-};
+use super::Thread;
+use crate::kernel::constants::{EAGAIN, EINVAL};
+use crate::kernel::syscall::User;
+use crate::kernel::user::{UserPointer, UserPointerMut};
+use crate::prelude::KResult;
 
 #[derive(PartialEq, Debug, Clone, Copy)]
 #[repr(u32)]
@@ -318,3 +316,38 @@ impl RobustListHead {
         Ok(())
     }
 }
+
+async fn do_futex_exit(thread: &Thread) -> KResult<()> {
+    if let Some(clear_ctid) = thread.get_clear_ctid() {
+        UserPointerMut::new(clear_ctid)?.write(0u32)?;
+
+        futex_wake(clear_ctid.addr(), None, 1).await?;
+    }
+
+    if let Some(robust_list) = thread.get_robust_list() {
+        robust_list.wake_all().await?;
+    }
+
+    Ok(())
+}
+
+pub async fn futex_exit(thread: &Thread) {
+    // We don't care about any error happened inside.
+    // If they've set up a wrong pointer, good luck to them...
+    let _ = do_futex_exit(thread);
+}
+
+async fn do_futex_exec(thread: &Thread) -> KResult<()> {
+    if let Some(robust_list) = thread.get_robust_list() {
+        robust_list.wake_all().await?;
+        thread.set_robust_list(None);
+    }
+
+    Ok(())
+}
+
+pub async fn futex_exec(thread: &Thread) {
+    // We don't care about any error happened inside.
+    // If they've set up a wrong pointer, good luck to them...
+    let _ = do_futex_exec(thread);
+}

+ 90 - 54
src/kernel/task/process_list.rs

@@ -1,16 +1,17 @@
+use alloc::collections::btree_map::BTreeMap;
+use alloc::collections::vec_deque::VecDeque;
+use alloc::sync::{Arc, Weak};
+use core::pin::pin;
 use core::sync::atomic::Ordering;
 
-use super::{Process, ProcessGroup, Session, Thread, WaitObject, WaitType};
-use crate::{
-    kernel::{task::futex_wake, user::UserPointerMut},
-    rcu::rcu_sync,
-};
-use alloc::{
-    collections::btree_map::BTreeMap,
-    sync::{Arc, Weak},
+use eonix_runtime::scheduler::RUNTIME;
+use eonix_sync::{AsProof as _, AsProofMut as _, RwLock, Spin, WaitList};
+
+use super::loader::LoadInfo;
+use super::{
+    alloc_pid, Process, ProcessBuilder, ProcessGroup, Session, Thread, ThreadBuilder, WaitObject,
 };
-use eonix_mm::address::Addr;
-use eonix_sync::{AsProof as _, AsProofMut as _, RwLock};
+use crate::rcu::rcu_sync;
 
 pub struct ProcessList {
     /// The init process.
@@ -78,7 +79,7 @@ impl ProcessList {
         }
     }
 
-    pub fn set_init_process(&mut self, init: Arc<Process>) {
+    fn set_init_process(&mut self, init: Arc<Process>) {
         let old_init = self.init.replace(init);
         assert!(old_init.is_none(), "Init process already set");
     }
@@ -103,45 +104,66 @@ impl ProcessList {
         self.sessions.get(&sid).and_then(Weak::upgrade)
     }
 
-    /// Make the process a zombie and notify the parent.
-    /// # Safety
-    /// This function will destroy the process and all its threads.
-    /// It is the caller's responsibility to ensure that the process is not
-    /// running or will not run after this function is called.
-    pub async unsafe fn do_exit(
-        &mut self,
-        thread: &Thread,
-        exit_status: WaitType,
-        is_exiting_group: bool,
-    ) {
-        let process = thread.process.clone();
-
-        if process.pid == 1 {
-            panic!("init exited");
-        }
+    pub async fn sys_init(load_info: LoadInfo) {
+        let thread_builder = ThreadBuilder::new()
+            .name(Arc::from(&b"busybox"[..]))
+            .entry(load_info.entry_ip, load_info.sp);
 
-        let inner = process.inner.access_mut(self.prove_mut());
+        let mut process_list = ProcessList::get().write().await;
+        let (thread, process) = ProcessBuilder::new()
+            .pid(alloc_pid())
+            .mm_list(load_info.mm_list)
+            .thread_builder(thread_builder)
+            .build(&mut process_list);
 
-        thread.dead.store(true, Ordering::SeqCst);
+        process_list.set_init_process(process);
 
-        if is_exiting_group {
-            // TODO: Send SIGKILL to all threads.
-            // todo!()
-        }
+        // TODO!!!: Remove this.
+        thread.files.open_console();
 
-        if thread.tid != process.pid {
-            self.threads.remove(&thread.tid);
-            inner.threads.remove(&thread.tid).unwrap();
-        }
+        RUNTIME.spawn(Reaper::daemon());
+        RUNTIME.spawn(thread.run());
+    }
+
+    pub fn send_to_reaper(thread: Arc<Thread>) {
+        GLOBAL_REAPER.reap_list.lock().push_back(thread);
+        GLOBAL_REAPER.wait.notify_one();
+    }
+}
+
+struct Reaper {
+    reap_list: Spin<VecDeque<Arc<Thread>>>,
+    wait: WaitList,
+}
+
+static GLOBAL_REAPER: Reaper = Reaper {
+    reap_list: Spin::new(VecDeque::new()),
+    wait: WaitList::new(),
+};
 
-        if let Some(clear_ctid) = thread.get_clear_ctid() {
-            let _ = UserPointerMut::new(clear_ctid).unwrap().write(0u32);
+impl Reaper {
+    async fn reap(&self, thread: Arc<Thread>) {
+        let exit_status = thread
+            .exit_status
+            .lock()
+            .take()
+            .expect("Exited thread with no exit status");
 
-            let _ = futex_wake(clear_ctid.addr(), None, 1).await;
+        let process = &thread.process;
+
+        if process.pid == 1 && thread.tid == process.pid {
+            panic!("init exited");
         }
 
-        if let Some(robust_list) = thread.get_robust_list() {
-            let _ = robust_list.wake_all().await;
+        let mut procs = ProcessList::get().write().await;
+
+        let inner = process.inner.access_mut(procs.prove_mut());
+
+        thread.dead.store(true, Ordering::SeqCst);
+
+        if thread.tid != process.pid {
+            procs.threads.remove(&thread.tid);
+            inner.threads.remove(&thread.tid).unwrap();
         }
 
         // main thread exit
@@ -151,48 +173,62 @@ impl ProcessList {
             thread.files.close_all().await;
 
             // If we are the session leader, we should drop the control terminal.
-            if process.session(self.prove()).sid == process.pid {
-                if let Some(terminal) = process.session(self.prove()).drop_control_terminal().await
+            if process.session(procs.prove()).sid == process.pid {
+                if let Some(terminal) = process.session(procs.prove()).drop_control_terminal().await
                 {
                     terminal.drop_session().await;
                 }
             }
 
             // Release the MMList as well as the page table.
-            unsafe {
-                // SAFETY: We are exiting the process, so no one might be using it.
-                process.mm_list.replace(None);
-            }
+            process.mm_list.release();
 
             // Make children orphans (adopted by init)
             {
-                let init = self.init_process();
+                let init = procs.init_process();
                 inner.children.retain(|_, child| {
                     let child = child.upgrade().unwrap();
                     // SAFETY: `child.parent` must be ourself. So we don't need to free it.
                     unsafe { child.parent.swap(Some(init.clone())) };
-                    init.add_child(&child, self.prove_mut());
+                    init.add_child(&child, procs.prove_mut());
 
                     false
                 });
             }
 
-            let mut init_notify = self.init_process().notify_batch();
+            let mut init_notify = procs.init_process().notify_batch();
             process
                 .wait_list
                 .drain_exited()
                 .into_iter()
                 .for_each(|item| init_notify.notify(item));
-            init_notify.finish(self.prove());
+            init_notify.finish(procs.prove());
 
-            process.parent(self.prove()).notify(
+            process.parent(procs.prove()).notify(
                 process.exit_signal,
                 WaitObject {
                     pid: process.pid,
                     code: exit_status,
                 },
-                self.prove(),
+                procs.prove(),
             );
         }
     }
+
+    async fn daemon() {
+        let me = &GLOBAL_REAPER;
+
+        loop {
+            let mut wait = pin!(me.wait.prepare_to_wait());
+            wait.as_mut().add_to_wait_list();
+
+            let thd_to_reap = me.reap_list.lock().pop_front();
+            if let Some(thd_to_reap) = thd_to_reap {
+                me.reap(thd_to_reap).await;
+                continue;
+            }
+
+            wait.await;
+        }
+    }
 }

+ 10 - 10
src/kernel/task/signal.rs

@@ -1,11 +1,10 @@
 mod signal_action;
 
-use super::{ProcessList, Thread, WaitObject, WaitType};
-use crate::kernel::constants::{EFAULT, EINVAL};
-use crate::{kernel::user::UserPointer, prelude::*};
 use alloc::collections::binary_heap::BinaryHeap;
 use alloc::sync::Arc;
-use core::{cmp::Reverse, task::Waker};
+use core::cmp::Reverse;
+use core::task::Waker;
+
 use eonix_hal::fpu::FpuState;
 use eonix_hal::traits::trap::RawTrapContext;
 use eonix_hal::trap::TrapContext;
@@ -14,9 +13,13 @@ use eonix_sync::AsProof as _;
 use intrusive_collections::UnsafeRef;
 use posix_types::signal::{SigSet, Signal};
 use posix_types::{SIGNAL_IGNORE, SIGNAL_NOW, SIGNAL_STOP};
+pub use signal_action::SignalAction;
 use signal_action::SignalActionList;
 
-pub use signal_action::SignalAction;
+use super::{ProcessList, Thread, WaitObject, WaitType};
+use crate::kernel::constants::{EFAULT, EINVAL};
+use crate::kernel::user::UserPointer;
+use crate::prelude::*;
 
 pub(self) const SAVED_DATA_SIZE: usize =
     size_of::<TrapContext>() + size_of::<FpuState>() + size_of::<SigSet>();
@@ -168,10 +171,7 @@ impl SignalList {
     pub async fn handle(&self, trap_ctx: &mut TrapContext, fpu_state: &mut FpuState) {
         loop {
             let signal = {
-                let signal = match self.inner.lock().pop() {
-                    Some(signal) => signal,
-                    None => return,
-                };
+                let Some(signal) = self.inner.lock().pop() else { return };
 
                 let handler = self.inner.lock().actions.get(signal);
                 if let SignalAction::SimpleHandler { mask, .. } = &handler {
@@ -246,7 +246,7 @@ impl SignalList {
                 }
                 signal => {
                     // Default to terminate the thread.
-                    Thread::current().force_kill(signal).await;
+                    Thread::current().force_kill(signal);
                     return;
                 }
             }

+ 38 - 11
src/kernel/task/thread.rs

@@ -24,8 +24,7 @@ use super::{stackful, Process, ProcessList, WaitType};
 use crate::kernel::interrupt::default_irq_handler;
 use crate::kernel::syscall::{syscall_handlers, SyscallHandler, User, UserMut};
 use crate::kernel::task::clone::CloneArgs;
-use crate::kernel::task::futex::RobustListHead;
-use crate::kernel::task::CloneFlags;
+use crate::kernel::task::{futex_exit, CloneFlags, RobustListHead};
 use crate::kernel::timer::{should_reschedule, timer_interrupt};
 use crate::kernel::user::{UserPointer, UserPointerMut};
 use crate::kernel::vfs::filearray::FileArray;
@@ -83,6 +82,7 @@ pub struct Thread {
     pub fpu_state: AtomicUniqueRefCell<FpuState>,
 
     pub dead: AtomicBool,
+    pub exit_status: Spin<Option<WaitType>>,
 
     inner: Spin<ThreadInner>,
 }
@@ -240,6 +240,7 @@ impl ThreadBuilder {
             trap_ctx: AtomicUniqueRefCell::new(trap_ctx),
             fpu_state: AtomicUniqueRefCell::new(fpu_state),
             dead: AtomicBool::new(false),
+            exit_status: Spin::new(None),
             inner: Spin::new(ThreadInner {
                 name,
                 tls: self.tls,
@@ -331,18 +332,26 @@ impl Thread {
         }
     }
 
-    pub async fn force_kill(&self, signal: Signal) {
-        let mut proc_list = ProcessList::get().write().await;
-        unsafe {
-            // SAFETY: Preemption is disabled.
-            proc_list
-                .do_exit(self, WaitType::Signaled(signal), false)
-                .await;
+    pub fn exit(&self, exit_status: WaitType) {
+        {
+            let mut self_status = self.exit_status.lock();
+            if self_status.is_some() {
+                // Someone has got here before us.
+                return;
+            }
+
+            *self_status = Some(exit_status);
         }
+
+        self.dead.store(true, Ordering::Release);
+    }
+
+    pub fn force_kill(&self, signal: Signal) {
+        self.exit(WaitType::Signaled(signal));
     }
 
     pub fn is_dead(&self) -> bool {
-        self.dead.load(Ordering::SeqCst)
+        self.dead.load(Ordering::Acquire)
     }
 
     async fn real_run(&self) {
@@ -385,6 +394,10 @@ impl Thread {
                     error_code,
                     address: addr,
                 }) => {
+                    if self.is_dead() {
+                        return;
+                    }
+
                     let mms = &self.process.mm_list;
                     if let Err(signal) = mms.handle_user_page_fault(addr, error_code).await {
                         self.signal_list.raise(signal);
@@ -407,6 +420,10 @@ impl Thread {
                     }
                 }
                 TrapType::Syscall { no, args } => {
+                    if self.is_dead() {
+                        return;
+                    }
+
                     if let Some(retval) = self.handle_syscall(thd_alloc, no, args).await {
                         let mut trap_ctx = self.trap_ctx.borrow();
                         trap_ctx.set_user_return_value(retval);
@@ -447,7 +464,17 @@ impl Thread {
     }
 
     pub fn run(self: Arc<Thread>) -> impl Future<Output = ()> + Send + 'static {
-        async move { self.contexted(stackful(self.real_run())).await }
+        async move {
+            self.contexted(async {
+                stackful(self.real_run()).await;
+
+                futex_exit(&self).await;
+            })
+            .await;
+
+            assert!(self.is_dead(), "`real_run` returned before the thread die?");
+            ProcessList::send_to_reaper(self);
+        }
     }
 }
 

+ 2 - 21
src/lib.rs

@@ -31,7 +31,6 @@ mod rcu;
 mod sync;
 
 use alloc::ffi::CString;
-use alloc::sync::Arc;
 use core::hint::spin_loop;
 use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
 
@@ -46,7 +45,7 @@ use eonix_mm::address::PRange;
 use eonix_runtime::executor::Stack;
 use eonix_runtime::scheduler::RUNTIME;
 use kernel::mem::GlobalPageAlloc;
-use kernel::task::{KernelStack, ProcessBuilder, ProcessList, ProgramLoader, ThreadBuilder};
+use kernel::task::{KernelStack, ProcessList, ProgramLoader};
 use kernel::vfs::dentry::Dentry;
 use kernel::vfs::mount::{do_mount, MS_NOATIME, MS_NODEV, MS_NOSUID, MS_RDONLY};
 use kernel::vfs::types::Permission;
@@ -56,8 +55,6 @@ use kernel_init::setup_memory;
 use path::Path;
 use prelude::*;
 
-use crate::kernel::task::alloc_pid;
-
 #[cfg(any(target_arch = "riscv64", target_arch = "loongarch64"))]
 fn do_panic() -> ! {
     #[cfg(target_arch = "riscv64")]
@@ -276,21 +273,5 @@ async fn init_process(early_kstack: PRange) {
             .expect("Failed to load init program")
     };
 
-    let thread_builder = ThreadBuilder::new()
-        .name(Arc::from(&b"busybox"[..]))
-        .entry(load_info.entry_ip, load_info.sp);
-
-    let mut process_list = ProcessList::get().write().await;
-    let (thread, process) = ProcessBuilder::new()
-        .pid(alloc_pid())
-        .mm_list(load_info.mm_list)
-        .thread_builder(thread_builder)
-        .build(&mut process_list);
-
-    process_list.set_init_process(process);
-
-    // TODO!!!: Remove this.
-    thread.files.open_console();
-
-    RUNTIME.spawn(thread.run());
+    ProcessList::sys_init(load_info).await;
 }