Browse Source

fix: deadlocks that happens on task switches

We might lock the `MMListInner` in some operation when we do a
task switch. Then in the idle task, we try to switch page table.
But that's impossible because we shouldn't do anything that could
possibly put us in a sleeping state.

Also, when we return from `schedule()` in `CondVar::wait()`, we might
be waken up from some signals. We should clean ourselves from the
waiters queue. Otherwise, we might be accidentally waken up by some
other tasks and ends up in some inconsistent task scheduler state.
greatbridf 4 months ago
parent
commit
6612cf8b21

+ 75 - 78
src/kernel/mem/mm_list.rs

@@ -35,7 +35,6 @@ pub enum Mapping {
 #[derive(Debug)]
 struct MMListInner {
     areas: BTreeSet<MMArea>,
-    page_table: PageTable,
     break_start: Option<VRange>,
     break_pos: Option<VAddr>,
 }
@@ -45,6 +44,8 @@ pub struct MMList {
     /// # Safety
     /// This field might be used in IRQ context, so it should be locked with `lock_irq()`.
     inner: Mutex<MMListInner>,
+    /// Do not modify entries in the page table without acquiring the `inner` lock.
+    page_table: PageTable,
 }
 
 impl FileMapping {
@@ -71,15 +72,6 @@ impl FileMapping {
 }
 
 impl MMListInner {
-    fn clear_user(&mut self) {
-        self.areas.retain(|area| {
-            self.page_table.unmap(area);
-            false
-        });
-        self.break_start = None;
-        self.break_pos = None;
-    }
-
     fn overlapping_addr(&self, addr: VAddr) -> Option<&MMArea> {
         self.areas.get(&VRange::from(addr))
     }
@@ -118,7 +110,7 @@ impl MMListInner {
         }
     }
 
-    fn unmap(&mut self, start: VAddr, len: usize) -> KResult<()> {
+    fn unmap(&mut self, page_table: &PageTable, start: VAddr, len: usize) -> KResult<()> {
         assert_eq!(start.floor(), start);
         let end = (start + len).ceil();
         let range = VRange::new(start, end);
@@ -136,7 +128,7 @@ impl MMListInner {
             }
             if area.range() == range.start().into() {
                 let (left, right) = area.clone().split(range.start());
-                self.page_table.unmap(&right.unwrap());
+                page_table.unmap(&right.unwrap());
 
                 if let Some(left) = left {
                     assert!(
@@ -146,7 +138,7 @@ impl MMListInner {
                 }
             } else if area.range() == range.end().into() {
                 let (left, right) = area.clone().split(range.end());
-                self.page_table.unmap(&left.unwrap());
+                page_table.unmap(&left.unwrap());
 
                 assert!(
                     back_remaining
@@ -155,7 +147,7 @@ impl MMListInner {
                     "There should be only one `back`."
                 );
             } else {
-                self.page_table.unmap(area);
+                page_table.unmap(area);
             }
 
             false
@@ -173,6 +165,7 @@ impl MMListInner {
 
     fn mmap(
         &mut self,
+        page_table: &PageTable,
         at: VAddr,
         len: usize,
         mapping: Mapping,
@@ -188,61 +181,13 @@ impl MMListInner {
         }
 
         match &mapping {
-            Mapping::Anonymous => self.page_table.set_anonymous(range, permission),
-            Mapping::File(_) => self.page_table.set_mmapped(range, permission),
+            Mapping::Anonymous => page_table.set_anonymous(range, permission),
+            Mapping::File(_) => page_table.set_mmapped(range, permission),
         }
 
         self.areas.insert(MMArea::new(range, mapping, permission));
         Ok(())
     }
-
-    fn grow(&self, area: &MMArea, len: usize) {
-        self.page_table.set_anonymous(
-            VRange::from(area.range().end()).grow(len),
-            Permission {
-                write: true,
-                execute: false,
-            },
-        );
-
-        area.grow(len);
-    }
-
-    fn set_break(&mut self, pos: Option<VAddr>) -> VAddr {
-        // SAFETY: `set_break` is only called in syscalls, where program break should be valid.
-        assert!(self.break_start.is_some() && self.break_pos.is_some());
-        let break_start = self.break_start.unwrap();
-        let current_break = self.break_pos.unwrap();
-        let pos = match pos {
-            None => return current_break,
-            Some(pos) => pos.ceil(),
-        };
-
-        let range = VRange::new(current_break, pos);
-        if !self.check_overlapping_range(range) {
-            return current_break;
-        }
-
-        if !self.areas.contains(&break_start) {
-            self.areas.insert(MMArea::new(
-                break_start,
-                Mapping::Anonymous,
-                Permission {
-                    write: true,
-                    execute: false,
-                },
-            ));
-        }
-
-        let program_break = self
-            .areas
-            .get(&break_start)
-            .expect("Program break area should be valid");
-
-        self.grow(program_break, pos - current_break);
-        self.break_pos = Some(pos);
-        pos
-    }
 }
 
 impl MMList {
@@ -250,10 +195,10 @@ impl MMList {
         Arc::new(Self {
             inner: Mutex::new(MMListInner {
                 areas: BTreeSet::new(),
-                page_table: PageTable::new(),
                 break_start: None,
                 break_pos: None,
             }),
+            page_table: PageTable::new(),
         })
     }
 
@@ -263,10 +208,10 @@ impl MMList {
         let list = Arc::new(Self {
             inner: Mutex::new(MMListInner {
                 areas: inner.areas.clone(),
-                page_table: PageTable::new(),
                 break_start: inner.break_start,
                 break_pos: inner.break_pos,
             }),
+            page_table: PageTable::new(),
         });
 
         // SAFETY: `self.inner` already locked with IRQ disabled.
@@ -274,8 +219,8 @@ impl MMList {
             let list_inner = list.inner.lock();
 
             for area in list_inner.areas.iter() {
-                let new_iter = list_inner.page_table.iter_user(area.range()).unwrap();
-                let old_iter = inner.page_table.iter_user(area.range()).unwrap();
+                let new_iter = list.page_table.iter_user(area.range()).unwrap();
+                let old_iter = self.page_table.iter_user(area.range()).unwrap();
 
                 for (new, old) in new_iter.zip(old_iter) {
                     new.setup_cow(old);
@@ -284,23 +229,29 @@ impl MMList {
         }
 
         // We set some pages as COW, so we need to invalidate TLB.
-        inner.page_table.lazy_invalidate_tlb_all();
+        self.page_table.lazy_invalidate_tlb_all();
 
         list
     }
 
     /// No need to do invalidation manually, `PageTable` already does it.
     pub fn clear_user(&self) {
-        self.inner.lock_irq().clear_user()
+        let mut inner = self.inner.lock_irq();
+        inner.areas.retain(|area| {
+            self.page_table.unmap(area);
+            false
+        });
+        inner.break_start = None;
+        inner.break_pos = None;
     }
 
     pub fn switch_page_table(&self) {
-        self.inner.lock_irq().page_table.switch();
+        self.page_table.switch();
     }
 
     /// No need to do invalidation manually, `PageTable` already does it.
     pub fn unmap(&self, start: VAddr, len: usize) -> KResult<()> {
-        self.inner.lock_irq().unmap(start, len)
+        self.inner.lock_irq().unmap(&self.page_table, start, len)
     }
 
     pub fn mmap_hint(
@@ -313,15 +264,15 @@ impl MMList {
         let mut inner = self.inner.lock_irq();
         if hint == VAddr::NULL {
             let at = inner.find_available(hint, len).ok_or(ENOMEM)?;
-            inner.mmap(at, len, mapping, permission)?;
+            inner.mmap(&self.page_table, at, len, mapping, permission)?;
             return Ok(at);
         }
 
-        match inner.mmap(hint, len, mapping.clone(), permission) {
+        match inner.mmap(&self.page_table, hint, len, mapping.clone(), permission) {
             Ok(()) => Ok(hint),
             Err(EEXIST) => {
                 let at = inner.find_available(hint, len).ok_or(ENOMEM)?;
-                inner.mmap(at, len, mapping, permission)?;
+                inner.mmap(&self.page_table, at, len, mapping, permission)?;
                 Ok(at)
             }
             Err(err) => Err(err),
@@ -335,12 +286,58 @@ impl MMList {
         mapping: Mapping,
         permission: Permission,
     ) -> KResult<VAddr> {
-        let mut inner = self.inner.lock_irq();
-        inner.mmap(at, len, mapping.clone(), permission).map(|_| at)
+        self.inner
+            .lock_irq()
+            .mmap(&self.page_table, at, len, mapping.clone(), permission)
+            .map(|_| at)
     }
 
     pub fn set_break(&self, pos: Option<VAddr>) -> VAddr {
-        self.inner.lock_irq().set_break(pos)
+        let mut inner = self.inner.lock_irq();
+
+        // SAFETY: `set_break` is only called in syscalls, where program break should be valid.
+        assert!(inner.break_start.is_some() && inner.break_pos.is_some());
+        let break_start = inner.break_start.unwrap();
+        let current_break = inner.break_pos.unwrap();
+        let pos = match pos {
+            None => return current_break,
+            Some(pos) => pos.ceil(),
+        };
+
+        let range = VRange::new(current_break, pos);
+        if !inner.check_overlapping_range(range) {
+            return current_break;
+        }
+
+        if !inner.areas.contains(&break_start) {
+            inner.areas.insert(MMArea::new(
+                break_start,
+                Mapping::Anonymous,
+                Permission {
+                    write: true,
+                    execute: false,
+                },
+            ));
+        }
+
+        let program_break = inner
+            .areas
+            .get(&break_start)
+            .expect("Program break area should be valid");
+
+        let len = pos - current_break;
+        self.page_table.set_anonymous(
+            VRange::from(program_break.range().end()).grow(len),
+            Permission {
+                write: true,
+                execute: false,
+            },
+        );
+
+        program_break.grow(len);
+
+        inner.break_pos = Some(pos);
+        pos
     }
 
     /// This should be called only **once** for every thread.

+ 6 - 2
src/kernel/mem/mm_list/page_fault.rs

@@ -62,7 +62,7 @@ impl MMList {
             }
         }
 
-        let pte = inner
+        let pte = self
             .page_table
             .iter_user(VRange::new(addr.floor(), addr.floor() + 0x1000))
             .unwrap()
@@ -196,7 +196,11 @@ pub fn handle_page_fault(int_stack: &mut interrupt_stack) {
         .handle_page_fault(int_stack, vaddr, error);
 
     if let Err(signal) = result {
-        println_debug!("Page fault on {:#x} in user space at {:#x}", vaddr.0, int_stack.v_rip);
+        println_debug!(
+            "Page fault on {:#x} in user space at {:#x}",
+            vaddr.0,
+            int_stack.v_rip
+        );
         ProcessList::kill_current(signal)
     }
 }

+ 11 - 9
src/kernel/task/scheduler.rs

@@ -82,7 +82,7 @@ impl Scheduler {
 
     pub fn usleep(&mut self, thread: &Arc<Thread>) {
         let mut state = thread.state.lock();
-        assert!(matches!(*state, ThreadState::Running));
+        assert_eq!(*state, ThreadState::Running);
         // No need to dequeue. We have proved that the thread is running so not in the queue.
 
         *state = ThreadState::USleep;
@@ -90,7 +90,7 @@ impl Scheduler {
 
     pub fn uwake(&mut self, thread: &Arc<Thread>) {
         let mut state = thread.state.lock();
-        assert!(matches!(*state, ThreadState::USleep));
+        assert_eq!(*state, ThreadState::USleep);
 
         *state = ThreadState::Ready;
         self.enqueue(&thread);
@@ -98,7 +98,7 @@ impl Scheduler {
 
     pub fn isleep(&mut self, thread: &Arc<Thread>) {
         let mut state = thread.state.lock();
-        assert!(matches!(*state, ThreadState::Running));
+        assert_eq!(*state, ThreadState::Running);
         // No need to dequeue. We have proved that the thread is running so not in the queue.
 
         *state = ThreadState::ISleep;
@@ -113,14 +113,14 @@ impl Scheduler {
                 *state = ThreadState::Ready;
                 self.enqueue(&thread);
             }
-            _ => panic!(),
+            state => panic!("Invalid transition from state {:?} to `Ready`", state),
         }
     }
 
     /// Put `Running` thread into `Ready` state and enqueue the task.
     pub fn put_ready(&mut self, thread: &Arc<Thread>) {
         let mut state = thread.state.lock();
-        assert!(matches!(*state, ThreadState::Running));
+        assert_eq!(*state, ThreadState::Running);
 
         *state = ThreadState::Ready;
         self.enqueue(&thread);
@@ -129,7 +129,7 @@ impl Scheduler {
     /// Set `Ready` threads to the `Running` state.
     pub fn set_running(&mut self, thread: &Arc<Thread>) {
         let mut state = thread.state.lock();
-        assert!(matches!(*state, ThreadState::Ready));
+        assert_eq!(*state, ThreadState::Ready);
 
         *state = ThreadState::Running;
         // No need to dequeue. We got the thread from the queue.
@@ -138,7 +138,7 @@ impl Scheduler {
     /// Set `Running` threads to the `Zombie` state.
     pub fn set_zombie(&mut self, thread: &Arc<Thread>) {
         let mut state = thread.state.lock();
-        assert!(matches!(*state, ThreadState::Running));
+        assert_eq!(*state, ThreadState::Running);
 
         *state = ThreadState::Zombie;
     }
@@ -181,11 +181,13 @@ fn context_switch_light(from: &Arc<Thread>, to: &Arc<Thread>) {
 /// In this function, we should see `preempt_count == 1`.
 extern "C" fn idle_task() {
     loop {
+        debug_assert_eq!(preempt::count(), 1);
+
         // SAFETY: No need to call `lock_irq`, preempt is already disabled.
         let mut scheduler = Scheduler::get().lock();
         let state = *Thread::current().state.lock();
 
-        // Previous thread is `Running`
+        // Previous thread is `Running`.
         if let ThreadState::Running = state {
             // No other thread to run, return to current running thread without changing its state.
             if scheduler.ready.is_empty() {
@@ -194,7 +196,7 @@ extern "C" fn idle_task() {
                 continue;
             } else {
                 // Put it into `Ready` state
-                scheduler.put_ready(&Thread::current());
+                scheduler.put_ready(Thread::current());
             }
         }
 

+ 55 - 78
src/kernel/task/signal.rs

@@ -69,7 +69,8 @@ struct SignalListInner {
 
 #[derive(Debug, Clone)]
 pub struct SignalList {
-    inner: Mutex<SignalListInner>,
+    /// We might use this inside interrupt handler, so we need to use `lock_irq`.
+    inner: Spin<SignalListInner>,
 }
 
 #[derive(Debug, Clone, Copy)]
@@ -155,16 +156,13 @@ impl SignalAction {
         self.sa_handler == SIG_DFL
     }
 
-    /// # Return
-    /// `(new_ip, new_sp)`
-    ///
     /// # Might Sleep
     fn handle(
         &self,
         signum: u32,
         int_stack: &mut interrupt_stack,
         mmxregs: &mut mmx_registers,
-    ) -> KResult<(usize, usize)> {
+    ) -> KResult<()> {
         if self.sa_flags & SA_RESTORER as usize == 0 {
             return Err(EINVAL);
         }
@@ -187,7 +185,9 @@ impl SignalAction {
         stack.copy(mmxregs)?.ok_or(EFAULT)?; // MMX registers
         stack.copy(int_stack)?.ok_or(EFAULT)?; // Interrupt stack
 
-        Ok((self.sa_handler, sp))
+        int_stack.v_rip = self.sa_handler;
+        int_stack.rsp = sp;
+        Ok(())
     }
 }
 
@@ -248,7 +248,7 @@ impl SignalListInner {
 impl SignalList {
     pub fn new() -> Self {
         Self {
-            inner: Mutex::new(SignalListInner {
+            inner: Spin::new(SignalListInner {
                 mask: 0,
                 pending: BinaryHeap::new(),
                 handlers: BTreeMap::new(),
@@ -257,19 +257,19 @@ impl SignalList {
     }
 
     pub fn get_mask(&self) -> u64 {
-        self.inner.lock().get_mask()
+        self.inner.lock_irq().get_mask()
     }
 
     pub fn set_mask(&self, mask: u64) {
-        self.inner.lock().set_mask(mask)
+        self.inner.lock_irq().set_mask(mask)
     }
 
     pub fn mask(&self, mask: u64) {
-        self.inner.lock().set_mask(mask)
+        self.inner.lock_irq().set_mask(mask)
     }
 
     pub fn unmask(&self, mask: u64) {
-        self.inner.lock().unmask(mask)
+        self.inner.lock_irq().unmask(mask)
     }
 
     pub fn set_handler(&self, signal: Signal, action: &SignalAction) -> KResult<()> {
@@ -277,7 +277,7 @@ impl SignalList {
             return Err(EINVAL);
         }
 
-        let mut inner = self.inner.lock();
+        let mut inner = self.inner.lock_irq();
         if action.is_default() {
             inner.handlers.remove(&signal);
         } else {
@@ -289,7 +289,7 @@ impl SignalList {
 
     pub fn get_handler(&self, signal: Signal) -> SignalAction {
         self.inner
-            .lock()
+            .lock_irq()
             .handlers
             .get(&signal)
             .cloned()
@@ -300,7 +300,7 @@ impl SignalList {
     /// This is used when `execve` is called.
     pub fn clear_non_ignore(&self) {
         self.inner
-            .lock()
+            .lock_irq()
             .handlers
             .retain(|_, action| action.is_ignore());
     }
@@ -308,87 +308,64 @@ impl SignalList {
     /// Clear all pending signals.
     /// This is used when `fork` is called.
     pub fn clear_pending(&self) {
-        self.inner.lock().pending.clear()
+        self.inner.lock_irq().pending.clear()
     }
 
     pub fn has_pending_signal(&self) -> bool {
-        !self.inner.lock().pending.is_empty()
+        !self.inner.lock_irq().pending.is_empty()
     }
 
     /// Do not use this, use `Thread::raise` instead.
     pub(super) fn raise(&self, signal: Signal) -> RaiseResult {
-        self.inner.lock().raise(signal)
+        self.inner.lock_irq().raise(signal)
     }
 
+    /// Handle signals in the context of `Thread::current()`.
+    ///
     /// # Safety
     /// This function might never return. Caller must make sure that local variables
     /// that own resources are dropped before calling this function.
-    ///
-    /// # Return
-    /// `(new_ip, new_sp)`
-    pub fn handle(
-        &self,
-        int_stack: &mut interrupt_stack,
-        mmxregs: &mut mmx_registers,
-    ) -> Option<(usize, usize)> {
-        let mut inner = self.inner.lock();
-
+    pub fn handle(&self, int_stack: &mut interrupt_stack, mmxregs: &mut mmx_registers) {
         loop {
-            let signal = match self.inner.lock().pop() {
-                Some(signal) => signal,
-                None => return None,
-            };
-
-            if signal.is_now() {
-                match signal {
-                    Signal::SIGKILL => terminate_process(signal),
-                    Signal::SIGSTOP => {
-                        Thread::current().do_stop();
-                        inner.unmask(signal.to_mask());
+            let signal = {
+                let mut inner = self.inner.lock_irq();
+                let signal = match inner.pop() {
+                    Some(signal) => signal,
+                    None => return,
+                };
+
+                if let Some(handler) = inner.handlers.get(&signal) {
+                    if !signal.is_now() {
+                        let result = handler.handle(signal.to_signum(), int_stack, mmxregs);
+                        match result {
+                            Err(EFAULT) => inner.raise(Signal::SIGSEGV),
+                            Err(_) => inner.raise(Signal::SIGSYS),
+                            Ok(()) => return,
+                        };
+                        continue;
                     }
-                    _ => unreachable!(),
                 }
-            }
 
-            match inner.handlers.get(&signal) {
-                // Default action
-                None => {
-                    match signal {
-                        s if s.is_continue() => {
-                            Thread::current().do_continue();
-                            inner.unmask(signal.to_mask());
-                            return None;
-                        }
-                        s if s.is_stop() => {
-                            Thread::current().do_stop();
-                            inner.unmask(signal.to_mask());
-                            continue;
-                        }
-                        s if s.is_coredump() => terminate_process_core_dump(signal),
-                        s if !s.is_ignore() => terminate_process(signal),
-                        _ => continue, // Ignore
-                    }
-                }
-                Some(handler) => {
-                    let result = handler.handle(signal.to_signum(), int_stack, mmxregs);
-                    match result {
-                        Err(EFAULT) => inner.raise(Signal::SIGSEGV),
-                        Err(_) => inner.raise(Signal::SIGSYS),
-                        Ok((ip, sp)) => return Some((ip, sp)),
-                    };
-                    continue;
-                }
+                // Default actions include stopping the thread, continuing the thread and
+                // terminating the process. All these actions will block the thread or return
+                // to the thread immediately. So we can unmask these signals now.
+                inner.unmask(signal.to_mask());
+                signal
+            };
+
+            // Default actions.
+            match signal {
+                Signal::SIGSTOP => Thread::current().do_stop(),
+                Signal::SIGCONT => Thread::current().do_continue(),
+                Signal::SIGKILL => ProcessList::kill_current(signal),
+                // Ignored
+                Signal::SIGCHLD | Signal::SIGURG | Signal::SIGWINCH => continue,
+                // "Soft" stops.
+                Signal::SIGTSTP | Signal::SIGTTIN | Signal::SIGTTOU => Thread::current().do_stop(),
+                // TODO!!!!!!: Check exit status format.
+                s if s.is_coredump() => ProcessList::kill_current(signal),
+                signal => ProcessList::kill_current(signal),
             }
         }
     }
 }
-
-// TODO!!!: Should we use `uwake` or `iwake`?
-fn terminate_process(signal: Signal) -> ! {
-    ProcessList::kill_current(signal)
-}
-
-// TODO!!!!!!: Check exit status format.
-fn terminate_process_core_dump(signal: Signal) -> ! {
-    ProcessList::kill_current(signal)
-}

+ 1 - 1
src/kernel/task/thread.rs

@@ -33,7 +33,7 @@ use super::{
     KernelStack, Scheduler,
 };
 
-#[derive(Debug, Clone, Copy)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum ThreadState {
     Preparing,
     Running,

+ 0 - 4
src/kernel/timer.rs

@@ -30,13 +30,9 @@ fn timer_interrupt() {
     TICKS.fetch_add(1, Ordering::Relaxed);
     if preempt::count() == 0 {
         println_debug!("Timer interrupt reschedule");
-        preempt::disable();
-
         // To make scheduler satisfied.
         preempt::disable();
         Scheduler::schedule();
-
-        preempt::enable();
     }
 }
 

+ 4 - 0
src/sync/condvar.rs

@@ -85,5 +85,9 @@ impl<const I: bool> CondVar<I> {
         unsafe { guard.force_unlock() };
         Scheduler::schedule();
         unsafe { guard.force_relock() };
+
+        self.waiters
+            .lock_irq()
+            .retain(|waiter| waiter != Thread::current());
     }
 }