浏览代码

feat: add percpu readyqueue

greatbridf 10 月之前
父节点
当前提交
c6f71ffe15

+ 5 - 1
Cargo.toml

@@ -15,7 +15,11 @@ spin = "0.9.8"
 
 [features]
 default = ["smp"]
-debug_syscall = []
+trace_condvar = []
+trace_syscall = []
+trace_scheduler = []
+log_trace = ["trace_condvar", "trace_syscall", "trace_scheduler"]
+log_debug = []
 smp = []
 
 [build-dependencies]

+ 143 - 143
arch/src/x86_64/interrupt.rs

@@ -34,161 +34,161 @@ global_asm!(
     .set SS, 0xa8
 
     .macro movcfi reg, offset
-    	mov \reg, \offset(%rsp)
-    	.cfi_rel_offset \reg, \offset
+        mov \reg, \offset(%rsp)
+        .cfi_rel_offset \reg, \offset
     .endm
 
     .macro movrst reg, offset
-    	mov \offset(%rsp), \reg
-    	.cfi_restore \reg
+        mov \offset(%rsp), \reg
+        .cfi_restore \reg
     .endm
 
     .globl ISR_stub_restore
     .type ISR_stub_restore @function
 
     ISR_stub:
-    	.cfi_startproc
-    	.cfi_signal_frame
-    	.cfi_def_cfa_offset 0x18
-    	.cfi_offset %rsp, 0x10
-
-    	cmpq $0x08, 24(%rsp)
-    	je 1f
-    	swapgs
-    
+        .cfi_startproc
+        .cfi_signal_frame
+        .cfi_def_cfa_offset 0x18
+        .cfi_offset %rsp, 0x10
+
+        cmpq $0x08, 24(%rsp)
+        je 1f
+        swapgs
+
     1:
-    	sub $0x78, %rsp
-    	.cfi_def_cfa_offset 0x90
-    
-    	movcfi %rax, RAX
-    	movcfi %rbx, RBX
-    	movcfi %rcx, RCX
-    	movcfi %rdx, RDX
-    	movcfi %rdi, RDI
-    	movcfi %rsi, RSI
-    	movcfi %r8,  R8
-    	movcfi %r9,  R9
-    	movcfi %r10, R10
-    	movcfi %r11, R11
-    	movcfi %r12, R12
-    	movcfi %r13, R13
-    	movcfi %r14, R14
-    	movcfi %r15, R15
-    	movcfi %rbp, RBP
-    
-    	mov INT_NO(%rsp), %rax
-    	sub $ISR0, %rax
-    	shr $3, %rax
-    	mov %rax, INT_NO(%rsp)
-    
-    	mov %rsp, %rbx
-    	.cfi_def_cfa_register %rbx
-    
-    	and $~0xf, %rsp
-    	sub $512, %rsp
-    	fxsave (%rsp)
-    
-    	mov %rbx, %rdi
-    	mov %rsp, %rsi
-    	call interrupt_handler
-    
+        sub $0x78, %rsp
+        .cfi_def_cfa_offset 0x90
+
+        movcfi %rax, RAX
+        movcfi %rbx, RBX
+        movcfi %rcx, RCX
+        movcfi %rdx, RDX
+        movcfi %rdi, RDI
+        movcfi %rsi, RSI
+        movcfi %r8,  R8
+        movcfi %r9,  R9
+        movcfi %r10, R10
+        movcfi %r11, R11
+        movcfi %r12, R12
+        movcfi %r13, R13
+        movcfi %r14, R14
+        movcfi %r15, R15
+        movcfi %rbp, RBP
+
+        mov INT_NO(%rsp), %rax
+        sub $ISR0, %rax
+        shr $3, %rax
+        mov %rax, INT_NO(%rsp)
+
+        mov %rsp, %rbx
+        .cfi_def_cfa_register %rbx
+
+        and $~0xf, %rsp
+        sub $512, %rsp
+        fxsave (%rsp)
+
+        mov %rbx, %rdi
+        mov %rsp, %rsi
+        call interrupt_handler
+
     ISR_stub_restore:
-    	fxrstor (%rsp)
-    	mov %rbx, %rsp
-    	.cfi_def_cfa_register %rsp
-    
-    	movrst %rax, RAX
-    	movrst %rbx, RBX
-    	movrst %rcx, RCX
-    	movrst %rdx, RDX
-    	movrst %rdi, RDI
-    	movrst %rsi, RSI
-    	movrst %r8,  R8
-    	movrst %r9,  R9
-    	movrst %r10, R10
-    	movrst %r11, R11
-    	movrst %r12, R12
-    	movrst %r13, R13
-    	movrst %r14, R14
-    	movrst %r15, R15
-    	movrst %rbp, RBP
-    
-    	add $0x88, %rsp
-    	.cfi_def_cfa_offset 0x08
-    
-    	cmpq $0x08, 8(%rsp)
-    	je 1f
-    	swapgs
-    
+        fxrstor (%rsp)
+        mov %rbx, %rsp
+        .cfi_def_cfa_register %rsp
+
+        movrst %rax, RAX
+        movrst %rbx, RBX
+        movrst %rcx, RCX
+        movrst %rdx, RDX
+        movrst %rdi, RDI
+        movrst %rsi, RSI
+        movrst %r8,  R8
+        movrst %r9,  R9
+        movrst %r10, R10
+        movrst %r11, R11
+        movrst %r12, R12
+        movrst %r13, R13
+        movrst %r14, R14
+        movrst %r15, R15
+        movrst %rbp, RBP
+
+        add $0x88, %rsp
+        .cfi_def_cfa_offset 0x08
+
+        cmpq $0x08, 8(%rsp)
+        je 1f
+        swapgs
+
     1:
-    	iretq
-    	.cfi_endproc
-    
+        iretq
+        .cfi_endproc
+
     .altmacro
     .macro build_isr_no_err name
-    	.align 8
-    	.globl ISR\name
-    	.type  ISR\name @function
-    	ISR\name:
-    		.cfi_startproc
-    		.cfi_signal_frame
-    		.cfi_def_cfa_offset 0x08
-    		.cfi_offset %rsp, 0x10
-    
-    		.cfi_same_value %rax
-    		.cfi_same_value %rbx
-    		.cfi_same_value %rcx
-    		.cfi_same_value %rdx
-    		.cfi_same_value %rdi
-    		.cfi_same_value %rsi
-    		.cfi_same_value %r8
-    		.cfi_same_value %r9
-    		.cfi_same_value %r10
-    		.cfi_same_value %r11
-    		.cfi_same_value %r12
-    		.cfi_same_value %r13
-    		.cfi_same_value %r14
-    		.cfi_same_value %r15
-    		.cfi_same_value %rbp
-    
-    		push %rbp # push placeholder for error code
-    		.cfi_def_cfa_offset 0x10
-    
-    		call ISR_stub
-    		.cfi_endproc
+        .align 8
+        .globl ISR\name
+        .type  ISR\name @function
+        ISR\name:
+            .cfi_startproc
+            .cfi_signal_frame
+            .cfi_def_cfa_offset 0x08
+            .cfi_offset %rsp, 0x10
+
+            .cfi_same_value %rax
+            .cfi_same_value %rbx
+            .cfi_same_value %rcx
+            .cfi_same_value %rdx
+            .cfi_same_value %rdi
+            .cfi_same_value %rsi
+            .cfi_same_value %r8
+            .cfi_same_value %r9
+            .cfi_same_value %r10
+            .cfi_same_value %r11
+            .cfi_same_value %r12
+            .cfi_same_value %r13
+            .cfi_same_value %r14
+            .cfi_same_value %r15
+            .cfi_same_value %rbp
+
+            push %rbp # push placeholder for error code
+            .cfi_def_cfa_offset 0x10
+
+            call ISR_stub
+            .cfi_endproc
     .endm
-    
+
     .altmacro
     .macro build_isr_err name
-    	.align 8
-    	.globl ISR\name
-    	.type  ISR\name @function
-    	ISR\name:
-    		.cfi_startproc
-    		.cfi_signal_frame
-    		.cfi_def_cfa_offset 0x10
-    		.cfi_offset %rsp, 0x10
-    
-    		.cfi_same_value %rax
-    		.cfi_same_value %rbx
-    		.cfi_same_value %rcx
-    		.cfi_same_value %rdx
-    		.cfi_same_value %rdi
-    		.cfi_same_value %rsi
-    		.cfi_same_value %r8
-    		.cfi_same_value %r9
-    		.cfi_same_value %r10
-    		.cfi_same_value %r11
-    		.cfi_same_value %r12
-    		.cfi_same_value %r13
-    		.cfi_same_value %r14
-    		.cfi_same_value %r15
-    		.cfi_same_value %rbp
-    
-    		call ISR_stub
-    		.cfi_endproc
+        .align 8
+        .globl ISR\name
+        .type  ISR\name @function
+        ISR\name:
+            .cfi_startproc
+            .cfi_signal_frame
+            .cfi_def_cfa_offset 0x10
+            .cfi_offset %rsp, 0x10
+
+            .cfi_same_value %rax
+            .cfi_same_value %rbx
+            .cfi_same_value %rcx
+            .cfi_same_value %rdx
+            .cfi_same_value %rdi
+            .cfi_same_value %rsi
+            .cfi_same_value %r8
+            .cfi_same_value %r9
+            .cfi_same_value %r10
+            .cfi_same_value %r11
+            .cfi_same_value %r12
+            .cfi_same_value %r13
+            .cfi_same_value %r14
+            .cfi_same_value %r15
+            .cfi_same_value %rbp
+
+            call ISR_stub
+            .cfi_endproc
     .endm
-    
+
     build_isr_no_err 0
     build_isr_no_err 1
     build_isr_no_err 2
@@ -221,20 +221,20 @@ global_asm!(
     build_isr_err    29
     build_isr_err    30
     build_isr_no_err 31
-    
+
     .set i, 32
     .rept 0x80+1
-    	build_isr_no_err %i
-    	.set i, i+1
+        build_isr_no_err %i
+        .set i, i+1
     .endr
-    
+
     .section .rodata
-    
+
     .align 8
     .globl ISR_START_ADDR
     .type  ISR_START_ADDR @object
     ISR_START_ADDR:
-    	.quad ISR0
+        .quad ISR0
     ",
     options(att_syntax),
 );

+ 21 - 1
src/kernel/console.rs

@@ -72,6 +72,7 @@ macro_rules! println_debug {
     };
 }
 
+#[allow(unused_macros)]
 macro_rules! println_info {
     ($($arg:tt)*) => {
         $crate::println!("[kernel: info] {}", format_args!($($arg)*))
@@ -87,6 +88,25 @@ macro_rules! println_fatal {
     };
 }
 
+macro_rules! println_trace {
+    ($feat:literal) => {
+        #[deny(unexpected_cfgs)]
+        {
+            #[cfg(feature = $feat)]
+            $crate::println!("[kernel:trace] ")
+        }
+    };
+    ($feat:literal, $($arg:tt)*) => {
+        #[deny(unexpected_cfgs)]
+        {
+            #[cfg(feature = $feat)]
+            $crate::println!("[kernel:trace] {}", format_args!($($arg)*))
+        }
+    };
+}
+
 use super::terminal::Terminal;
 
-pub(crate) use {print, println, println_debug, println_fatal, println_info, println_warn};
+pub(crate) use {
+    print, println, println_debug, println_fatal, println_info, println_trace, println_warn,
+};

+ 5 - 1
src/kernel/cpu.rs

@@ -2,7 +2,10 @@ use core::{pin::Pin, ptr::NonNull};
 
 use arch::CPUStatus;
 
-use super::mem::{paging::Page, phys::PhysPtr as _};
+use super::{
+    mem::{paging::Page, phys::PhysPtr as _},
+    task::init_rq_thiscpu,
+};
 
 #[arch::define_percpu]
 static CPU_STATUS: Option<CPUStatus> = None;
@@ -29,4 +32,5 @@ pub unsafe fn init_thiscpu() {
 
     // SAFETY: `CPU_STATUS` is global static and initialized only once.
     current_cpu().init();
+    init_rq_thiscpu();
 }

+ 19 - 18
src/kernel/syscall.rs

@@ -96,6 +96,7 @@ macro_rules! arg_register {
     };
 }
 
+#[allow(unused_macros)]
 macro_rules! format_expand {
     ($name:ident, $arg:tt) => {
         format_args!("{}: {:x?}", stringify!($name), $arg)
@@ -108,34 +109,34 @@ macro_rules! format_expand {
 macro_rules! syscall32_call {
     ($is:ident, $handler:ident, $($arg:ident: $type:ty),*) => {{
         use $crate::kernel::syscall::{MapArgument, MapArgumentImpl, arg_register};
+        #[allow(unused_imports)]
         use $crate::kernel::syscall::{MapReturnValue, format_expand};
-        use $crate::{kernel::task::Thread, println_info};
+        #[allow(unused_imports)]
+        use $crate::{kernel::task::Thread, println_trace};
 
         $(
             let $arg: $type =
                 MapArgumentImpl::map_arg(arg_register!(${index()}, $is));
         )*
 
-        if cfg!(feature = "debug_syscall") {
-            println_info!(
-                "tid{}: {}({}) => {{",
-                Thread::current().tid,
-                stringify!($handler),
-                format_expand!($($arg, $arg),*),
-            );
-        }
+        println_trace!(
+            "trace_syscall",
+            "tid{}: {}({}) => {{",
+            Thread::current().tid,
+            stringify!($handler),
+            format_expand!($($arg, $arg),*),
+        );
 
         let result = $handler($($arg),*);
 
-        if cfg!(feature = "debug_syscall") {
-            println_info!(
-                "tid{}: {}({}) => }} = {:x?}",
-                Thread::current().tid,
-                stringify!($handler),
-                format_expand!($($arg, $arg),*),
-                result
-            );
-        }
+        println_trace!(
+            "trace_syscall",
+            "tid{}: {}({}) => }} = {:x?}",
+            Thread::current().tid,
+            stringify!($handler),
+            format_expand!($($arg, $arg),*),
+            result
+        );
 
         match result {
             Ok(val) => MapReturnValue::map_ret(val),

+ 1 - 1
src/kernel/syscall/procops.rs

@@ -584,7 +584,7 @@ fn sys_fork(int_stack: &mut InterruptContext, _: &mut ExtendedContext) -> usize
     new_int_stack.rax = 0;
     new_int_stack.eflags = 0x200;
     new_thread.fork_init(new_int_stack);
-    Scheduler::get().lock_irq().uwake(&new_thread);
+    new_thread.uwake();
     new_thread.process.pid as usize
 }
 

+ 2 - 0
src/kernel/task.rs

@@ -2,6 +2,7 @@ mod kstack;
 mod process;
 mod process_group;
 mod process_list;
+mod readyqueue;
 mod scheduler;
 mod session;
 mod signal;
@@ -12,6 +13,7 @@ pub(self) use kstack::KernelStack;
 pub use process::{Process, WaitObject, WaitType};
 pub use process_group::ProcessGroup;
 pub use process_list::{init_multitasking, ProcessList};
+pub use readyqueue::init_rq_thiscpu;
 pub use scheduler::Scheduler;
 pub use session::Session;
 pub use signal::{Signal, SignalAction};

+ 2 - 4
src/kernel/task/process_list.rs

@@ -135,7 +135,7 @@ impl ProcessList {
         // the threads are stopped then proceed.
         for thread in inner.threads.values().map(|t| t.upgrade().unwrap()) {
             assert!(thread.tid == Thread::current().tid);
-            Scheduler::get().lock().set_zombie(&thread);
+            thread.set_zombie();
             thread.files.close_all();
         }
 
@@ -214,9 +214,7 @@ pub unsafe fn init_multitasking(init_fn: unsafe extern "C" fn()) {
     procs.init = Some(init_process);
     procs.idle = Some(idle_process);
 
-    let mut scheduler = Scheduler::get().lock_irq();
-
     init_thread.init(init_fn as usize);
-    scheduler.uwake(&init_thread);
+    init_thread.uwake();
     Scheduler::set_idle_and_current(idle_thread);
 }

+ 48 - 0
src/kernel/task/readyqueue.rs

@@ -0,0 +1,48 @@
+use alloc::{collections::VecDeque, sync::Arc};
+
+use crate::{println_debug, sync::Spin};
+
+use super::Thread;
+
+#[arch::define_percpu]
+static READYQUEUE: Option<Spin<FifoReadyQueue>> = None;
+
+pub trait ReadyQueue {
+    fn get(&mut self) -> Option<Arc<Thread>>;
+    fn put(&mut self, thread: Arc<Thread>);
+}
+
+pub struct FifoReadyQueue {
+    threads: VecDeque<Arc<Thread>>,
+}
+
+impl FifoReadyQueue {
+    pub const fn new() -> Self {
+        FifoReadyQueue {
+            threads: VecDeque::new(),
+        }
+    }
+}
+
+impl ReadyQueue for FifoReadyQueue {
+    fn get(&mut self) -> Option<Arc<Thread>> {
+        self.threads.pop_front()
+    }
+
+    fn put(&mut self, thread: Arc<Thread>) {
+        self.threads.push_back(thread);
+    }
+}
+
+pub fn rq_thiscpu() -> &'static Spin<dyn ReadyQueue> {
+    // SAFETY: When we use ReadyQueue on this CPU, we will lock it with `lock_irq()`
+    //         and if we use ReadyQueue on other CPU, we won't be able to touch it on this CPU.
+    //         So no issue here.
+    unsafe { READYQUEUE.as_ref() }
+        .as_ref()
+        .expect("ReadyQueue should be initialized")
+}
+
+pub unsafe fn init_rq_thiscpu() {
+    READYQUEUE.set(Some(Spin::new(FifoReadyQueue::new())));
+}

+ 45 - 109
src/kernel/task/scheduler.rs

@@ -3,16 +3,13 @@ use core::{
     sync::atomic::{compiler_fence, fence, Ordering},
 };
 
-use crate::{prelude::*, sync::preempt};
+use crate::{kernel::console::println_trace, prelude::*, sync::preempt};
 
-use alloc::{collections::vec_deque::VecDeque, sync::Arc};
-use lazy_static::lazy_static;
+use alloc::sync::Arc;
 
-use super::{Thread, ThreadState};
+use super::{readyqueue::rq_thiscpu, Thread};
 
-pub struct Scheduler {
-    ready: VecDeque<Arc<Thread>>,
-}
+pub struct Scheduler;
 
 /// Idle task thread
 /// All the idle task threads belongs to `pid 0` and are pinned to the current cpu.
@@ -23,12 +20,6 @@ static IDLE_TASK: Option<NonNull<Thread>> = None;
 #[arch::define_percpu]
 static CURRENT: Option<NonNull<Thread>> = None;
 
-lazy_static! {
-    static ref GLOBAL_SCHEDULER: Spin<Scheduler> = Spin::new(Scheduler {
-        ready: VecDeque::new(),
-    });
-}
-
 impl Scheduler {
     /// `Scheduler` might be used in various places. Do not hold it for a long time.
     ///
@@ -37,7 +28,8 @@ impl Scheduler {
     /// rescheduling during access to the scheduler. Disabling preemption will do the same.
     ///
     /// Drop the lock before calling `schedule`.
-    pub fn get() -> &'static Spin<Self> {
+    pub fn get() -> &'static Self {
+        static GLOBAL_SCHEDULER: Scheduler = Scheduler;
         &GLOBAL_SCHEDULER
     }
 
@@ -58,11 +50,6 @@ impl Scheduler {
     pub unsafe fn set_idle_and_current(thread: Arc<Thread>) {
         // We don't wake the idle thread to prevent from accidentally being scheduled there.
         thread.init(idle_task as *const () as usize);
-        assert_eq!(
-            thread.oncpu.swap(true, Ordering::AcqRel),
-            false,
-            "Idle task is already on cpu"
-        );
 
         let old = IDLE_TASK.swap(NonNull::new(Arc::into_raw(thread.clone()) as *mut _));
         assert!(old.is_none(), "Idle task is already set");
@@ -71,89 +58,12 @@ impl Scheduler {
         assert!(old.is_none(), "Current is already set");
     }
 
-    pub fn pop(&mut self) -> Option<Arc<Thread>> {
-        self.ready.pop_front()
-    }
-
-    pub unsafe fn swap_current(&mut self, next: Arc<Thread>) {
-        {
-            let mut next_state = next.state.lock();
-            assert_eq!(*next_state, ThreadState::Ready);
-            *next_state = ThreadState::Running;
-            assert_eq!(next.oncpu.swap(true, Ordering::AcqRel), false);
-        }
-
-        let old: Option<NonNull<Thread>> =
-            CURRENT.swap(NonNull::new(Arc::into_raw(next) as *mut _));
-
-        if let Some(thread_pointer) = old {
-            let thread = Arc::from_raw(thread_pointer.as_ptr());
-            let mut state = thread.state.lock();
-            assert_eq!(thread.oncpu.swap(false, Ordering::AcqRel), true);
-
-            if let ThreadState::Running = *state {
-                *state = ThreadState::Ready;
-                self.enqueue(&thread);
-            }
-        }
-    }
-
-    fn enqueue(&mut self, thread: &Arc<Thread>) {
-        self.ready.push_back(thread.clone());
-    }
-
-    pub fn usleep(&mut self, thread: &Arc<Thread>) {
-        let mut state = thread.state.lock();
-        assert_eq!(*state, ThreadState::Running);
-        // No need to dequeue. We have proved that the thread is running so not in the queue.
-
-        *state = ThreadState::USleep;
-    }
-
-    pub fn uwake(&mut self, thread: &Arc<Thread>) {
-        let mut state = thread.state.lock();
-        assert_eq!(*state, ThreadState::USleep);
-
-        if thread.oncpu.load(Ordering::Acquire) {
-            *state = ThreadState::Running;
-        } else {
-            *state = ThreadState::Ready;
-            self.enqueue(&thread);
+    pub fn activate(&self, thread: &Arc<Thread>) {
+        // TODO: Select an appropriate ready queue to enqueue.
+        if !thread.on_rq.swap(true, Ordering::AcqRel) {
+            rq_thiscpu().lock_irq().put(thread.clone());
         }
     }
-
-    pub fn isleep(&mut self, thread: &Arc<Thread>) {
-        let mut state = thread.state.lock();
-        assert_eq!(*state, ThreadState::Running);
-        // No need to dequeue. We have proved that the thread is running so not in the queue.
-
-        *state = ThreadState::ISleep;
-    }
-
-    pub fn iwake(&mut self, thread: &Arc<Thread>) {
-        let mut state = thread.state.lock();
-
-        match *state {
-            ThreadState::Ready | ThreadState::Running | ThreadState::USleep => return,
-            ThreadState::ISleep => {
-                if thread.oncpu.load(Ordering::Acquire) {
-                    *state = ThreadState::Running;
-                } else {
-                    *state = ThreadState::Ready;
-                    self.enqueue(&thread);
-                }
-            }
-            state => panic!("Invalid transition from state {:?} to `Ready`", state),
-        }
-    }
-
-    /// Set `Running` threads to the `Zombie` state.
-    pub fn set_zombie(&mut self, thread: &Arc<Thread>) {
-        let mut state = thread.state.lock();
-        assert_eq!(*state, ThreadState::Running);
-
-        *state = ThreadState::Zombie;
-    }
 }
 
 impl Scheduler {
@@ -198,14 +108,16 @@ extern "C" fn idle_task() {
     loop {
         debug_assert_eq!(preempt::count(), 1);
 
-        let mut scheduler = Scheduler::get().lock_irq();
-        let state = *Thread::current().state.lock();
-
-        // No other thread to run
-        match scheduler.pop() {
+        let next = rq_thiscpu().lock().get();
+        match next {
             None => {
-                drop(scheduler);
-                if let ThreadState::Running = state {
+                if Thread::current().state.is_runnable() {
+                    println_trace!(
+                        "trace_scheduler",
+                        "Returning to tid({}) without doing context switch",
+                        Thread::current().tid
+                    );
+
                     // Previous thread is `Running`, Return to current running thread
                     // without changing its state.
                     context_switch_light(&Scheduler::idle_task(), &Thread::current());
@@ -216,9 +128,33 @@ extern "C" fn idle_task() {
                 continue;
             }
             Some(next) => {
+                println_trace!(
+                    "trace_scheduler",
+                    "Switching from tid({}) to tid({})",
+                    Thread::current().tid,
+                    next.tid
+                );
+
+                debug_assert_ne!(
+                    next.tid,
+                    Thread::current().tid,
+                    "Switching to the same thread"
+                );
+
                 next.process.mm_list.switch_page_table();
-                unsafe { scheduler.swap_current(next) };
-                drop(scheduler);
+
+                if let Some(thread_pointer) =
+                    CURRENT.swap(NonNull::new(Arc::into_raw(next) as *mut _))
+                {
+                    let thread = unsafe { Arc::from_raw(thread_pointer.as_ptr()) };
+                    let mut rq = rq_thiscpu().lock();
+
+                    if thread.state.is_runnable() {
+                        rq.put(thread);
+                    } else {
+                        thread.on_rq.store(false, Ordering::Release);
+                    }
+                }
             }
         }
 

+ 85 - 26
src/kernel/task/thread.rs

@@ -1,7 +1,7 @@
 use core::{
     arch::naked_asm,
     cell::{RefCell, UnsafeCell},
-    sync::atomic::AtomicBool,
+    sync::atomic::{AtomicBool, AtomicU32, Ordering},
 };
 
 use crate::{
@@ -21,14 +21,41 @@ use super::{
 
 use arch::{InterruptContext, TaskContext, UserTLS};
 
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-pub enum ThreadState {
-    Preparing,
-    Running,
-    Ready,
-    Zombie,
-    ISleep,
-    USleep,
+#[derive(Debug)]
+pub struct ThreadState(AtomicU32);
+
+impl ThreadState {
+    pub const RUNNING: u32 = 0;
+    pub const PREPARING: u32 = 1;
+    pub const ZOMBIE: u32 = 2;
+    pub const ISLEEP: u32 = 4;
+    pub const USLEEP: u32 = 8;
+
+    pub const fn new(state: u32) -> Self {
+        Self(AtomicU32::new(state))
+    }
+
+    pub fn store(&self, state: u32) {
+        self.0.store(state, Ordering::Release);
+    }
+
+    pub fn swap(&self, state: u32) -> u32 {
+        self.0.swap(state, Ordering::AcqRel)
+    }
+
+    pub fn cmpxchg(&self, current: u32, new: u32) -> u32 {
+        self.0
+            .compare_exchange(current, new, Ordering::AcqRel, Ordering::Acquire)
+            .unwrap_or_else(|x| x)
+    }
+
+    pub fn assert(&self, state: u32) {
+        assert_eq!(self.0.load(Ordering::Acquire), state);
+    }
+
+    pub fn is_runnable(&self) -> bool {
+        self.0.load(Ordering::Acquire) == Self::RUNNING
+    }
 }
 
 #[derive(Debug)]
@@ -54,9 +81,8 @@ pub struct Thread {
     pub signal_list: SignalList,
 
     /// Thread state for scheduler use.
-    pub state: Spin<ThreadState>,
-
-    pub oncpu: AtomicBool,
+    pub state: ThreadState,
+    pub on_rq: AtomicBool,
 
     /// Thread context
     pub context: UnsafeCell<TaskContext>,
@@ -126,8 +152,8 @@ impl Thread {
             signal_list: SignalList::new(),
             kstack: RefCell::new(KernelStack::new()),
             context: UnsafeCell::new(TaskContext::new()),
-            state: Spin::new(ThreadState::Preparing),
-            oncpu: AtomicBool::new(false),
+            state: ThreadState::new(ThreadState::PREPARING),
+            on_rq: AtomicBool::new(false),
             inner: Spin::new(ThreadInner {
                 name,
                 tls: None,
@@ -142,9 +168,8 @@ impl Thread {
     pub fn new_cloned(&self, procs: &mut ProcessList) -> Arc<Self> {
         let process = Process::new_cloned(&self.process, procs);
 
-        let state = self.state.lock();
         let inner = self.inner.lock();
-        assert!(matches!(*state, ThreadState::Running));
+        self.state.assert(ThreadState::RUNNING);
 
         let signal_list = self.signal_list.clone();
         signal_list.clear_pending();
@@ -157,8 +182,8 @@ impl Thread {
             signal_list,
             kstack: RefCell::new(KernelStack::new()),
             context: UnsafeCell::new(TaskContext::new()),
-            state: Spin::new(ThreadState::Preparing),
-            oncpu: AtomicBool::new(false),
+            state: ThreadState::new(ThreadState::PREPARING),
+            on_rq: AtomicBool::new(false),
             inner: Spin::new(ThreadInner {
                 name: inner.name.clone(),
                 tls: inner.tls.clone(),
@@ -190,7 +215,7 @@ impl Thread {
 
         // `SIGSTOP` can only be waken up by `SIGCONT` or `SIGKILL`.
         // SAFETY: Preempt disabled above.
-        Scheduler::get().lock().usleep(self);
+        self.usleep();
         Scheduler::schedule();
     }
 
@@ -206,14 +231,14 @@ impl Thread {
         }
     }
 
-    pub fn raise(self: &Arc<Thread>, signal: Signal) -> RaiseResult {
+    pub fn raise(self: &Arc<Self>, signal: Signal) -> RaiseResult {
         match self.signal_list.raise(signal) {
             RaiseResult::ShouldIWakeUp => {
-                Scheduler::get().lock_irq().iwake(self);
+                self.iwake();
                 RaiseResult::Finished
             }
             RaiseResult::ShouldUWakeUp => {
-                Scheduler::get().lock_irq().uwake(self);
+                self.uwake();
                 RaiseResult::Finished
             }
             result => result,
@@ -256,8 +281,7 @@ impl Thread {
     }
 
     pub fn fork_init(&self, interrupt_context: InterruptContext) {
-        let mut state = self.state.lock();
-        *state = ThreadState::USleep;
+        self.state.store(ThreadState::USLEEP);
 
         let sp = self.kstack.borrow().init(interrupt_context);
         unsafe {
@@ -269,8 +293,7 @@ impl Thread {
     }
 
     pub fn init(&self, entry: usize) {
-        let mut state = self.state.lock();
-        *state = ThreadState::USleep;
+        self.state.store(ThreadState::USLEEP);
         unsafe {
             self.get_context_mut_ptr()
                 .as_mut()
@@ -301,6 +324,42 @@ impl Thread {
     pub fn get_name(&self) -> Arc<[u8]> {
         self.inner.lock().name.clone()
     }
+
+    pub fn usleep(&self) {
+        // No need to dequeue. We have proved that the thread is running so not in the queue.
+        let prev_state = self.state.swap(ThreadState::USLEEP);
+        assert_eq!(prev_state, ThreadState::RUNNING);
+    }
+
+    pub fn uwake(self: &Arc<Self>) {
+        let prev_state = self.state.swap(ThreadState::RUNNING);
+        assert_eq!(prev_state, ThreadState::USLEEP);
+
+        Scheduler::get().activate(self);
+    }
+
+    pub fn isleep(self: &Arc<Self>) {
+        // No need to dequeue. We have proved that the thread is running so not in the queue.
+        let prev_state = self.state.swap(ThreadState::ISLEEP);
+        assert_eq!(prev_state, ThreadState::RUNNING);
+    }
+
+    pub fn iwake(self: &Arc<Self>) {
+        match self
+            .state
+            .cmpxchg(ThreadState::ISLEEP, ThreadState::RUNNING)
+        {
+            ThreadState::RUNNING | ThreadState::USLEEP => return,
+            ThreadState::ISLEEP => Scheduler::get().activate(self),
+            state => panic!("Invalid transition from state {:?} to `Running`", state),
+        }
+    }
+
+    /// Set `Running` threads to the `Zombie` state.
+    pub fn set_zombie(self: &Arc<Self>) {
+        let prev_state = self.state.swap(ThreadState::ZOMBIE);
+        assert_eq!(prev_state, ThreadState::RUNNING);
+    }
 }
 
 #[naked]

+ 1 - 1
src/prelude.rs

@@ -19,7 +19,7 @@ pub use crate::bindings::root as bindings;
 
 #[allow(unused_imports)]
 pub(crate) use crate::kernel::console::{
-    print, println, println_debug, println_fatal, println_info, println_warn,
+    print, println, println_debug, println_fatal, println_info, println_trace, println_warn,
 };
 
 #[allow(unused_imports)]

+ 22 - 22
src/sync/condvar.rs

@@ -1,5 +1,8 @@
 use crate::{
-    kernel::task::{Scheduler, Thread},
+    kernel::{
+        console::println_trace,
+        task::{Scheduler, Thread, ThreadState},
+    },
     prelude::*,
     sync::preempt,
 };
@@ -28,33 +31,36 @@ impl<const I: bool> CondVar<I> {
         }
     }
 
-    fn wake(schedule: &mut Scheduler, thread: &Arc<Thread>) {
+    fn wake(thread: &Arc<Thread>) {
+        println_trace!("trace_condvar", "tid({}) is trying to wake", thread.tid);
         if I {
-            schedule.iwake(thread);
+            thread.iwake();
         } else {
-            schedule.uwake(thread);
+            thread.uwake();
         }
+        println_trace!("trace_condvar", "tid({}) is awake", thread.tid);
     }
 
-    fn sleep(scheduler: &mut Scheduler) {
+    fn sleep() {
+        let thread = Thread::current();
+        println_trace!("trace_condvar", "tid({}) is trying to sleep", thread.tid);
         if I {
-            scheduler.isleep(&Thread::current());
+            thread.isleep();
         } else {
-            scheduler.usleep(&Thread::current());
+            thread.usleep();
         }
+        println_trace!("trace_condvar", "tid({}) is sleeping", thread.tid);
     }
 
     pub fn notify_one(&self) {
-        let mut scheduler = Scheduler::get().lock_irq();
         if let Some(waiter) = self.waiters.lock().pop_front() {
-            Self::wake(scheduler.as_mut(), &waiter);
+            Self::wake(&waiter);
         }
     }
 
     pub fn notify_all(&self) {
-        let mut scheduler = Scheduler::get().lock_irq();
         self.waiters.lock().retain(|waiter| {
-            Self::wake(scheduler.as_mut(), &waiter);
+            Self::wake(&waiter);
             false
         });
     }
@@ -63,18 +69,10 @@ impl<const I: bool> CondVar<I> {
     ///
     /// # Might Sleep
     /// This function **might sleep**, so call it in a preemptible context.
-    ///
-    /// # Return
-    /// - `true`: a pending signal was received
     pub fn wait<'a, T, S: LockStrategy, const W: bool>(&self, guard: &mut Guard<'a, T, S, W>) {
         preempt::disable();
-        {
-            let mut scheduler = Scheduler::get().lock_irq();
-            // We have scheduler locked and IRQ disabled. So no one could be waking us up for now.
-
-            self.waiters.lock().push_back(Thread::current().clone());
-            Self::sleep(scheduler.as_mut());
-        }
+        self.waiters.lock().push_back(Thread::current().clone());
+        Self::sleep();
 
         // TODO!!!: Another way to do this:
         //
@@ -86,8 +84,10 @@ impl<const I: bool> CondVar<I> {
         Scheduler::schedule();
         unsafe { guard.force_relock() };
 
+        Thread::current().state.assert(ThreadState::RUNNING);
+
         self.waiters
-            .lock_irq()
+            .lock()
             .retain(|waiter| waiter.tid != Thread::current().tid);
     }
 }