1
0

6 Коммиты e037208da1 ... a7bcb9ce15

Автор SHA1 Сообщение Дата
  greatbridf a7bcb9ce15 Merge branch 'buddy_refactor' 1 неделя назад
  greatbridf 29bc525d10 Merge branch 'process-rewrite' 1 неделя назад
  greatbridf 8f28dfb772 feat: impl Write for dyn Buffer 1 неделя назад
  shao 5b3f887a65 refactor: refactor page alloc in rust 2 недель назад
  greatbridf 12dacd3fad fix(c++build): disable mmx and sse 2 недель назад
  greatbridf 66958893e5 rewrite: move Process, ProcessGroup, Session out of thread.rs 2 недель назад

+ 0 - 1
.rustfmt.toml

@@ -67,7 +67,6 @@ use_field_init_shorthand = false
 force_explicit_abi = true
 condense_wildcard_suffixes = false
 color = "Auto"
-required_version = "1.7.1"
 unstable_features = false
 disable_all_formatting = false
 skip_children = false

+ 1 - 2
CMakeLists.txt

@@ -6,7 +6,7 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 set(CMAKE_CXX_LINK_EXECUTABLE
     "<CMAKE_LINKER> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>")
 
-set(C_CXX_FLAGS "-nostdinc -nostdlib -W -Wall -Wextra -Wno-stringop-overflow -Wno-builtin-declaration-mismatch -Wno-format -fverbose-asm -fno-exceptions -ffreestanding -fno-pic -mno-red-zone -mstack-protector-guard=global -mcmodel=kernel")
+set(C_CXX_FLAGS "-nostdinc -nostdlib -mno-sse -mno-mmx -W -Wall -Wextra -Wno-stringop-overflow -Wno-builtin-declaration-mismatch -Wno-format -fverbose-asm -fno-exceptions -ffreestanding -fno-pic -mno-red-zone -mstack-protector-guard=global -mcmodel=kernel")
 set(CMAKE_C_FLAGS "${C_CXX_FLAGS} -Werror=implicit-int -Werror=implicit-function-declaration -Werror=strict-aliasing")
 set(CMAKE_CXX_FLAGS "${C_CXX_FLAGS} -fno-use-cxa-atexit -fno-rtti")
 set(CMAKE_CXX_LINK_FLAGS "-z noexecstack --gc-sections")
@@ -42,7 +42,6 @@ set(KERNEL_MAIN_SOURCES src/kinit.cpp
                         src/kernel/async/lock.cc
                         src/kernel/allocator.cc
                         src/kernel/process.cpp
-                        src/kernel/mem/paging.cc
                         src/kernel/mem/slab.cc
                         src/kernel/vga.cpp
                         src/kernel/hw/acpi.cc

+ 7 - 25
include/kernel/mem/paging.hpp

@@ -96,29 +96,6 @@ struct page {
 
 inline page* PAGE_ARRAY;
 
-void create_zone(uintptr_t start, uintptr_t end);
-void mark_present(uintptr_t start, uintptr_t end);
-
-[[nodiscard]] page* alloc_page();
-// order represents power of 2
-[[nodiscard]] page* alloc_pages(unsigned order);
-
-// order represents power of 2
-void free_pages(page* page, unsigned order);
-void free_page(page* page);
-
-// order represents power of 2
-void free_pages(pfn_t pfn, unsigned order);
-void free_page(pfn_t pfn);
-
-// clear the page all zero
-[[nodiscard]] pfn_t alloc_page_table();
-
-pfn_t page_to_pfn(page* page);
-page* pfn_to_page(pfn_t pfn);
-
-void increase_refcount(page* page);
-
 constexpr unsigned long PAGE_FAULT_P = 0x00000001;
 constexpr unsigned long PAGE_FAULT_W = 0x00000002;
 constexpr unsigned long PAGE_FAULT_U = 0x00000004;
@@ -128,6 +105,11 @@ constexpr unsigned long PAGE_FAULT_PK = 0x00000020;
 constexpr unsigned long PAGE_FAULT_SS = 0x00000040;
 constexpr unsigned long PAGE_FAULT_SGX = 0x00008000;
 
-void handle_page_fault(interrupt_stack* int_stack);
-
 } // namespace kernel::mem::paging
+
+struct Page;
+
+extern "C" Page* c_alloc_page();
+extern "C" Page* c_alloc_pages(uint32_t order);
+extern "C" uintptr_t c_alloc_page_table();
+extern "C" uintptr_t page_to_pfn(Page* page);

+ 6 - 6
src/fs/procfs.rs

@@ -20,7 +20,7 @@ use crate::{
         },
     },
     prelude::*,
-    sync::Locked,
+    sync::{AsRefMutPosition as _, AsRefPosition as _, Locked},
 };
 
 fn split_len_offset(data: &[u8], len: usize, offset: usize) -> Option<&[u8]> {
@@ -135,7 +135,7 @@ impl Inode for DirInode {
         let lock = self.rwsem.lock_shared();
         Ok(self
             .entries
-            .access(lock.as_ref())
+            .access(lock.as_pos())
             .iter()
             .find_map(|(name, node)| {
                 name.as_ref()
@@ -151,7 +151,7 @@ impl Inode for DirInode {
     ) -> KResult<usize> {
         let lock = self.rwsem.lock_shared();
         self.entries
-            .access(lock.as_ref())
+            .access(lock.as_pos())
             .iter()
             .skip(offset)
             .map(|(name, node)| callback(name.as_ref(), node.ino()))
@@ -236,10 +236,10 @@ pub fn creat(
     let inode = FileInode::new(ino, Arc::downgrade(&fs), file);
 
     {
-        let mut lock = parent.idata.rwsem.lock();
+        let lock = parent.idata.rwsem.lock();
         parent
             .entries
-            .access_mut(lock.as_mut())
+            .access_mut(lock.as_pos_mut())
             .push((name, ProcFsNode::File(inode.clone())));
     }
 
@@ -259,7 +259,7 @@ pub fn mkdir(parent: &ProcFsNode, name: &[u8]) -> KResult<ProcFsNode> {
 
     parent
         .entries
-        .access_mut(inode.rwsem.lock().as_mut())
+        .access_mut(inode.rwsem.lock().as_pos_mut())
         .push((Arc::from(name), ProcFsNode::Dir(inode.clone())));
 
     Ok(ProcFsNode::Dir(inode))

+ 19 - 20
src/fs/tmpfs.rs

@@ -14,7 +14,7 @@ use crate::{
         DevId,
     },
     prelude::*,
-    sync::Locked,
+    sync::{AsRefMutPosition as _, AsRefPosition as _, Locked, RefMutPosition},
 };
 
 fn acquire(vfs: &Weak<dyn Vfs>) -> KResult<Arc<dyn Vfs>> {
@@ -68,7 +68,7 @@ impl DirectoryInode {
         })
     }
 
-    fn link(&self, name: Arc<[u8]>, file: &dyn Inode, dlock: &mut ()) {
+    fn link(&self, name: Arc<[u8]>, file: &dyn Inode, dlock: RefMutPosition<'_, ()>) {
         // SAFETY: Only `unlink` will do something based on `nlink` count
         //         No need to synchronize here
         file.nlink.fetch_add(1, Ordering::Relaxed);
@@ -88,7 +88,7 @@ impl Inode for DirectoryInode {
     ) -> KResult<usize> {
         let lock = self.rwsem.lock_shared();
         self.entries
-            .access(lock.as_ref())
+            .access(lock.as_pos())
             .iter()
             .skip(offset)
             .map(|(name, ino)| callback(&name, *ino))
@@ -101,12 +101,12 @@ impl Inode for DirectoryInode {
         let vfs = acquire(&self.vfs)?;
         let vfs = astmp(&vfs);
 
-        let mut rwsem = self.rwsem.lock();
+        let rwsem = self.rwsem.lock();
 
         let ino = vfs.assign_ino();
         let file = FileInode::new(ino, self.vfs.clone(), mode);
 
-        self.link(at.name().clone(), file.as_ref(), rwsem.as_mut());
+        self.link(at.name().clone(), file.as_ref(), rwsem.as_pos_mut());
         at.save_reg(file)
     }
 
@@ -118,7 +118,7 @@ impl Inode for DirectoryInode {
         let vfs = acquire(&self.vfs)?;
         let vfs = astmp(&vfs);
 
-        let mut rwsem = self.rwsem.lock();
+        let rwsem = self.rwsem.lock();
 
         let ino = vfs.assign_ino();
         let file = NodeInode::new(
@@ -128,7 +128,7 @@ impl Inode for DirectoryInode {
             dev,
         );
 
-        self.link(at.name().clone(), file.as_ref(), rwsem.as_mut());
+        self.link(at.name().clone(), file.as_ref(), rwsem.as_pos_mut());
         at.save_reg(file)
     }
 
@@ -136,12 +136,12 @@ impl Inode for DirectoryInode {
         let vfs = acquire(&self.vfs)?;
         let vfs = astmp(&vfs);
 
-        let mut rwsem = self.rwsem.lock();
+        let rwsem = self.rwsem.lock();
 
         let ino = vfs.assign_ino();
         let file = SymlinkInode::new(ino, self.vfs.clone(), target.into());
 
-        self.link(at.name().clone(), file.as_ref(), rwsem.as_mut());
+        self.link(at.name().clone(), file.as_ref(), rwsem.as_pos_mut());
         at.save_symlink(file)
     }
 
@@ -149,20 +149,19 @@ impl Inode for DirectoryInode {
         let vfs = acquire(&self.vfs)?;
         let vfs = astmp(&vfs);
 
-        let mut rwsem = self.rwsem.lock();
+        let rwsem = self.rwsem.lock();
 
         let ino = vfs.assign_ino();
         let newdir = DirectoryInode::new(ino, self.vfs.clone(), mode);
 
-        self.link(at.name().clone(), newdir.as_ref(), rwsem.as_mut());
+        self.link(at.name().clone(), newdir.as_ref(), rwsem.as_pos_mut());
         at.save_dir(newdir)
     }
 
     fn unlink(&self, at: &Arc<Dentry>) -> KResult<()> {
-        let vfs = acquire(&self.vfs)?;
-        let vfs = astmp(&vfs);
+        let _vfs = acquire(&self.vfs)?;
 
-        let mut dlock = self.rwsem.lock();
+        let dlock = self.rwsem.lock();
 
         let file = at.get_inode()?;
         let _flock = file.rwsem.lock();
@@ -172,7 +171,7 @@ impl Inode for DirectoryInode {
             return Err(EISDIR);
         }
 
-        let entries = self.entries.access_mut(dlock.as_mut());
+        let entries = self.entries.access_mut(dlock.as_pos_mut());
         entries.retain(|(_, ino)| *ino != file.ino);
 
         assert_eq!(
@@ -253,7 +252,7 @@ impl Inode for FileInode {
         // TODO: We don't need that strong guarantee, find some way to avoid locks
         let lock = self.rwsem.lock_shared();
 
-        match self.filedata.access(lock.as_ref()).split_at_checked(offset) {
+        match self.filedata.access(lock.as_pos()).split_at_checked(offset) {
             Some((_, data)) => buffer.fill(data).map(|result| result.allow_partial()),
             None => Ok(0),
         }
@@ -261,8 +260,8 @@ impl Inode for FileInode {
 
     fn write(&self, buffer: &[u8], offset: WriteOffset) -> KResult<usize> {
         // TODO: We don't need that strong guarantee, find some way to avoid locks
-        let mut lock = self.rwsem.lock();
-        let filedata = self.filedata.access_mut(lock.as_mut());
+        let lock = self.rwsem.lock();
+        let filedata = self.filedata.access_mut(lock.as_pos_mut());
 
         let offset = match offset {
             WriteOffset::Position(offset) => offset,
@@ -289,8 +288,8 @@ impl Inode for FileInode {
 
     fn truncate(&self, length: usize) -> KResult<()> {
         // TODO: We don't need that strong guarantee, find some way to avoid locks
-        let mut lock = self.rwsem.lock();
-        let filedata = self.filedata.access_mut(lock.as_mut());
+        let lock = self.rwsem.lock();
+        let filedata = self.filedata.access_mut(lock.as_pos_mut());
 
         // SAFETY: `lock` has done the synchronization
         self.size.store(length as u64, Ordering::Relaxed);

+ 60 - 0
src/intrusive_list.rs

@@ -0,0 +1,60 @@
+use core::ptr::NonNull;
+
+pub struct Link {
+    prev: Option<NonNull<Link>>,
+    next: Option<NonNull<Link>>,
+}
+
+impl Link {
+    pub const fn new() -> Self {
+        Self {
+            prev: None,
+            next: None,
+        }
+    }
+
+    pub fn insert(&mut self, node: &mut Self) {
+        unsafe {
+            let insert_node = NonNull::new(node as *mut Self);
+            if let Some(next) = self.next {
+                (*next.as_ptr()).prev = insert_node;
+            }
+            node.next = self.next;
+            node.prev = NonNull::new(self as *mut Self);
+            self.next = insert_node;
+        }
+    }
+
+    pub fn remove(&mut self) {
+        if let Some(next) = self.next {
+            unsafe { (*next.as_ptr()).prev = self.prev };
+        }
+
+        if let Some(prev) = self.prev {
+            unsafe { (*prev.as_ptr()).next = self.next };
+        }
+
+        self.prev = None;
+        self.next = None;
+    }
+
+    pub fn next(&self) -> Option<&Self> {
+        self.next.map(|node| unsafe { &*node.as_ptr() })
+    }
+
+    pub fn next_mut(&mut self) -> Option<&mut Self> {
+        self.next.map(|node| unsafe { &mut *node.as_ptr() })
+    }
+}
+
+#[macro_export]
+macro_rules! container_of {
+    ($ptr:expr, $type:ty, $($f:tt)*) => {{
+        let ptr = $ptr as *const _ as *const u8;
+        let offset: usize = ::core::mem::offset_of!($type, $($f)*);
+        ptr.sub(offset) as *mut $type
+    }}
+}
+
+#[allow(unused_imports)]
+pub use container_of;

+ 9 - 0
src/io.rs

@@ -249,3 +249,12 @@ impl Write for RawBuffer<'_> {
         }
     }
 }
+
+impl Write for dyn Buffer + '_ {
+    fn write_str(&mut self, s: &str) -> core::fmt::Result {
+        match self.fill(s.as_bytes()) {
+            Ok(FillResult::Done(_)) => Ok(()),
+            _ => Err(core::fmt::Error),
+        }
+    }
+}

+ 2 - 2
src/kernel/allocator.cc

@@ -117,11 +117,11 @@ std::byte* brk_memory_allocator::brk(byte* addr) {
 
         auto pdpte = pdpt[std::get<2>(idx)];
         if (!pdpte.pfn())
-            pdpte.set(PA_KERNEL_PAGE_TABLE, alloc_page_table());
+            pdpte.set(PA_KERNEL_PAGE_TABLE, c_alloc_page_table());
 
         auto pde = pdpte.parse()[std::get<3>(idx)];
         assert(!(pde.attributes() & PA_P));
-        pde.set(PA_KERNEL_DATA_HUGE, page_to_pfn(alloc_pages(9)));
+        pde.set(PA_KERNEL_DATA_HUGE, page_to_pfn(c_alloc_pages(9)) << 12);
 
         current_allocated += 0x200000;
     }

+ 7 - 5
src/kernel/chardev.rs

@@ -5,11 +5,11 @@ use alloc::{
 };
 use bindings::{EEXIST, EIO};
 
-use crate::{io::Buffer, kernel::console::CONSOLE, prelude::*};
+use crate::{io::Buffer, kernel::console::CONSOLE, prelude::*, sync::AsRefPosition as _};
 
 use super::{
     block::make_device,
-    task::Thread,
+    task::{ProcessList, Thread},
     terminal::Terminal,
     vfs::{
         file::{File, TerminalFile},
@@ -76,11 +76,13 @@ impl CharDevice {
     pub fn open(self: &Arc<Self>) -> KResult<Arc<File>> {
         Ok(match &self.device {
             CharDeviceType::Terminal(terminal) => {
+                let procs = ProcessList::get().lock_shared();
+                let current = Thread::current();
+                let session = current.process.session(procs.as_pos());
                 // We only set the control terminal if the process is the session leader.
-                if Thread::current().process.sid() == Thread::current().process.pid {
-                    let session = Thread::current().process.session();
+                if session.sid == Thread::current().process.pid {
                     // Silently fail if we can't set the control terminal.
-                    dont_check!(session.set_control_terminal(&terminal, false));
+                    dont_check!(session.set_control_terminal(&terminal, false, procs.as_pos()));
                 }
 
                 TerminalFile::new(terminal.clone())

+ 4 - 2
src/kernel/cpu.rs

@@ -15,7 +15,7 @@ pub unsafe fn current_cpu() -> Pin<&'static mut CPUStatus> {
 }
 
 pub unsafe fn init_thiscpu() {
-    CPU_STATUS.set(Some(arch::CPUStatus::new_thiscpu(|layout| {
+    let status = arch::CPUStatus::new_thiscpu(|layout| {
         // TODO: Use page size defined in `arch`.
         let page_count = (layout.size() + 0x1000 - 1) / 0x1000;
         let page = Page::alloc_ceil(page_count);
@@ -23,7 +23,9 @@ pub unsafe fn init_thiscpu() {
         core::mem::forget(page);
 
         NonNull::new(pointer).expect("Allocated page pfn should be non-null")
-    })));
+    });
+
+    CPU_STATUS.set(Some(status));
 
     // SAFETY: `CPU_STATUS` is global static and initialized only once.
     current_cpu().init();

+ 5 - 2
src/kernel/mem.rs

@@ -1,12 +1,15 @@
 pub mod paging;
 pub mod phys;
 
+mod address;
 mod mm_area;
 mod mm_list;
+mod page_alloc;
 mod page_table;
-mod vrange;
 
+pub use address::{PAddr, VAddr, VRange, PFN, VPN};
 pub(self) use mm_area::MMArea;
 pub use mm_list::{handle_page_fault, FileMapping, MMList, Mapping, PageFaultError, Permission};
+pub(self) use page_alloc::{alloc_page, alloc_pages, create_pages, free_pages, mark_present};
 pub(self) use page_table::{PageTable, PTE};
-pub use vrange::{VAddr, VRange};
+pub use paging::{Page, PageBuffer};

+ 392 - 0
src/kernel/mem/address.rs

@@ -0,0 +1,392 @@
+use core::{
+    cmp::Ordering,
+    fmt::{self, Debug, Formatter},
+    ops::{Add, Sub, RangeBounds},
+};
+
+#[repr(C)]
+#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]
+pub struct PAddr(pub usize);
+
+#[repr(C)]
+#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]
+pub struct VAddr(pub usize);
+
+#[repr(C)]
+#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]
+pub struct PFN(pub usize);
+
+#[repr(C)]
+#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]
+pub struct VPN(pub usize);
+
+const PAGE_SIZE: usize = 4096;
+const PAGE_SIZE_BITS: usize = 12;
+const USER_SPACE_MEMORY_TOP: VAddr = VAddr(0x8000_0000_0000);
+
+impl From<PAddr> for usize {
+    fn from(v: PAddr) -> Self {
+        v.0
+    }
+}
+
+impl From<PFN> for usize {
+    fn from(v: PFN) -> Self {
+        v.0
+    }
+}
+
+impl From<VAddr> for usize {
+    fn from(v: VAddr) -> Self {
+       v.0
+    }
+}
+
+impl From<VPN> for usize {
+    fn from(v: VPN) -> Self {
+        v.0
+    }
+}
+
+impl From<usize> for PAddr {
+    fn from(v: usize) -> Self {
+        Self(v)
+    }
+}
+
+impl From<usize> for PFN {
+    fn from(v: usize) -> Self {
+        Self(v)
+    }
+}
+
+impl From<usize> for VAddr {
+    fn from(v: usize) -> Self {
+        Self(v)
+    }
+}
+
+impl From<usize> for VPN {
+    fn from(v: usize) -> Self {
+        Self(v)
+    }
+}
+
+
+impl From<VPN> for VAddr {
+    fn from(v: VPN) -> Self {
+        Self(v.0 << PAGE_SIZE_BITS)
+    }
+}
+
+impl From<VAddr> for VPN {
+    fn from(v: VAddr) -> Self {
+        assert_eq!(v.page_offset(), 0);
+        v.floor_vpn()
+    }
+}
+
+impl From<PAddr> for PFN {
+    fn from(v: PAddr) -> Self {
+        assert_eq!(v.page_offset(), 0);
+        v.floor_pfn()
+    }
+}
+
+impl From<PFN> for PAddr {
+    fn from(v: PFN) -> Self {
+        Self(v.0 << PAGE_SIZE_BITS)
+    }
+}
+
+impl PAddr {
+    pub fn floor_pfn(&self) -> PFN {
+        PFN(self.0 / PAGE_SIZE)
+    }
+
+    pub fn ceil_pfn(&self) -> PFN {
+        PFN((self.0 + PAGE_SIZE - 1) / PAGE_SIZE)
+    }
+
+    pub fn page_offset(&self) -> usize {
+        self.0 & (PAGE_SIZE - 1)
+    }
+
+    pub fn is_aligned(&self) -> bool {
+        self.page_offset() == 0
+    }
+}
+
+impl PFN {
+    pub fn buddy_pfn(&self, order: u32) -> PFN {
+        PFN::from(self.0 ^ (1 << order))
+    }
+
+    pub fn combined_pfn(&self, buddy_pfn: PFN) -> PFN {
+        PFN::from(self.0 & buddy_pfn.0)
+    }
+}
+
+impl VAddr {
+    pub const NULL: Self = Self(0);
+
+    pub fn floor_vpn(&self) -> VPN {
+        VPN(self.0 / PAGE_SIZE)
+    }
+
+    pub fn ceil_vpn(&self) -> VPN {
+        VPN((self.0 - 1 + PAGE_SIZE) / PAGE_SIZE)
+    }
+
+    pub fn page_offset(&self) -> usize {
+        self.0 & (PAGE_SIZE - 1)
+    }
+
+    pub fn is_aligned(&self) -> bool {
+        self.page_offset() == 0
+    }
+
+    pub fn is_user(&self) -> bool {
+        self.0 != 0 && self < &USER_SPACE_MEMORY_TOP
+    }
+
+    pub fn floor(&self) -> Self {
+        VAddr(self.0 & !(PAGE_SIZE - 1))
+    }
+
+    pub fn ceil(&self) -> Self {
+        VAddr((self.0 + (PAGE_SIZE - 1)) & !(PAGE_SIZE - 1))
+    }
+}
+
+impl Sub for VAddr {
+    type Output = usize;
+
+    fn sub(self, rhs: Self) -> Self::Output {
+        self.0 - rhs.0
+    }
+}
+
+impl Sub<usize> for VAddr {
+    type Output = Self;
+
+    fn sub(self, rhs: usize) -> Self::Output {
+        VAddr(self.0 - rhs)
+    }
+}
+
+impl Add<usize> for VAddr {
+    type Output = Self;
+
+    fn add(self, rhs: usize) -> Self::Output {
+        VAddr(self.0 + rhs)
+    }
+}
+
+impl Sub for PAddr {
+    type Output = usize;
+
+    fn sub(self, rhs: Self) -> Self::Output {
+        self.0 - rhs.0
+    }
+}
+
+impl Sub<usize> for PAddr {
+    type Output = Self;
+
+    fn sub(self, rhs: usize) -> Self::Output {
+        PAddr(self.0 - rhs)
+    }
+}
+
+impl Add<usize> for PAddr {
+    type Output = Self;
+
+    fn add(self, rhs: usize) -> Self::Output {
+        PAddr(self.0 + rhs)
+    }
+}
+
+impl Debug for VAddr {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        write!(f, "VAddr{:#x}", self.0)
+    }
+}
+
+impl Debug for PAddr {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        write!(f, "PAddr{:#x}", self.0)
+    }
+}
+
+impl Add<usize> for PFN {
+    type Output = Self;
+
+    fn add(self, rhs: usize) -> Self::Output {
+        PFN(self.0 + rhs)
+    }
+}
+
+impl Sub for PFN {
+    type Output = usize;
+
+    fn sub(self, rhs: Self) -> Self::Output {
+        self.0 - rhs.0
+    }
+}
+
+impl Sub<usize> for PFN {
+    type Output = Self;
+
+    fn sub(self, rhs: usize) -> Self::Output {
+        PFN(self.0 - rhs)
+    }
+}
+
+impl Add<usize> for VPN {
+    type Output = Self;
+
+    fn add(self, rhs: usize) -> Self::Output {
+        VPN(self.0 + rhs)
+    }
+}
+
+impl Sub for VPN {
+    type Output = usize;
+
+    fn sub(self, rhs: Self) -> Self::Output {
+        self.0 - rhs.0
+    }
+}
+
+impl Sub<usize> for VPN {
+    type Output = Self;
+
+    fn sub(self, rhs: usize) -> Self::Output {
+        VPN(self.0 - rhs)
+    }
+}
+
+impl Debug for VPN {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        write!(f, "VPN{:#x}", self.0)
+    }
+}
+
+impl Debug for PFN {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        write!(f, "PFN{:#x}", self.0)
+    }
+}
+
+#[derive(Clone, Copy)]
+pub struct VRange {
+    start: VAddr,
+    end: VAddr,
+}
+
+impl Debug for VRange {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        write!(f, "[{:?}, {:?})", self.start, self.end)
+    }
+}
+
+impl Eq for VRange {}
+impl PartialOrd for VRange {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl PartialEq for VRange {
+    fn eq(&self, other: &Self) -> bool {
+        self.cmp(other) == Ordering::Equal
+    }
+}
+
+/// Any two ranges that have one of them containing the other are considered equal.
+impl Ord for VRange {
+    fn cmp(&self, other: &Self) -> Ordering {
+        if self.start == other.start {
+            return Ordering::Equal;
+        }
+
+        if self.end == other.end {
+            if self.start == self.end {
+                return Ordering::Greater;
+            }
+            if other.start == other.end {
+                return Ordering::Less;
+            }
+            return Ordering::Equal;
+        }
+
+        if self.start < other.start {
+            if other.end < self.end {
+                return Ordering::Equal;
+            } else {
+                return Ordering::Less;
+            }
+        }
+
+        if other.start < self.start {
+            if self.end < other.end {
+                return Ordering::Equal;
+            } else {
+                return Ordering::Greater;
+            }
+        }
+
+        unreachable!()
+    }
+}
+
+impl From<VAddr> for VRange {
+    fn from(addr: VAddr) -> Self {
+        VRange::new(addr, addr)
+    }
+}
+
+impl VRange {
+    pub fn new(start: VAddr, end: VAddr) -> Self {
+        assert!(start <= end);
+        VRange { start, end }
+    }
+
+    pub fn is_overlapped(&self, other: &Self) -> bool {
+        self == other
+    }
+
+    pub fn is_user(&self) -> bool {
+        self.start < USER_SPACE_MEMORY_TOP && self.end <= USER_SPACE_MEMORY_TOP
+    }
+
+    pub fn start(&self) -> VAddr {
+        self.start
+    }
+
+    pub fn end(&self) -> VAddr {
+        self.end
+    }
+
+    pub fn len(&self) -> usize {
+        self.end.0 - self.start.0
+    }
+
+    pub fn shrink(&self, count: usize) -> Self {
+        assert!(count <= self.len());
+        VRange::new(self.start, self.end - count)
+    }
+
+    pub fn grow(&self, count: usize) -> Self {
+        VRange::new(self.start, self.end + count)
+    }
+
+    pub fn into_range(self) -> impl RangeBounds<Self> {
+        if self.len() == 0 {
+            VRange::from(self.start())..=VRange::from(self.start())
+        } else {
+            VRange::from(self.start())..=VRange::from(self.end() - 1)
+        }
+    }
+}

+ 0 - 323
src/kernel/mem/mm_list.cc

@@ -1,323 +0,0 @@
-#include <assert.h>
-#include <errno.h>
-#include <stdint.h>
-
-#include <kernel/mem/mm_list.hpp>
-#include <kernel/mem/paging.hpp>
-#include <kernel/mem/vm_area.hpp>
-
-using namespace kernel::mem;
-
-static inline void __invalidate_all_tlb() {
-    asm volatile(
-        "mov %%cr3, %%rax\n\t"
-        "mov %%rax, %%cr3\n\t"
-        :
-        :
-        : "rax", "memory");
-}
-
-static inline void __dealloc_page_table_all(paging::pfn_t pt, int depth, int from, int to) {
-    using namespace paging;
-
-    if (depth > 1) {
-        for (int i = from; i < to; ++i) {
-            auto pse = PSE{pt}[i];
-            if (!(pse.attributes() & PA_P))
-                continue;
-
-            int pfn = pse.pfn();
-            __dealloc_page_table_all(pfn, depth - 1, 0, 512);
-        }
-    }
-
-    free_page(pt);
-}
-
-static inline void __dealloc_page_table(paging::pfn_t pt) {
-    using namespace paging;
-    auto start_idx = idx_p4(0);
-    auto end_idx = idx_p4(KERNEL_SPACE_START);
-
-    __dealloc_page_table_all(pt, 4, start_idx, end_idx);
-}
-
-mm_list::mm_list() : m_pt{paging::alloc_page_table()}, m_brk{m_areas.end()} {
-    // copy only kernel space
-    memcpy(physaddr<void>{m_pt + 0x800}, physaddr<void>{KERNEL_PML4 + 0x800}, 0x800);
-}
-
-mm_list::mm_list(const mm_list& other) : mm_list{} {
-    m_areas = other.m_areas;
-
-    using namespace paging;
-    for (auto iter = m_areas.begin(); iter != m_areas.end(); ++iter) {
-        auto& area = *iter;
-
-        if (area.flags & MM_BREAK)
-            m_brk = iter;
-
-        auto this_iter = vaddr_range{m_pt, area.start, area.end};
-        auto other_iter = vaddr_range{other.m_pt, area.start, area.end};
-
-        while (this_iter) {
-            auto this_pte = *this_iter, other_pte = *other_iter;
-            auto attributes = other_pte.attributes();
-            auto pfn = other_pte.pfn();
-
-            attributes &= ~(PA_RW | PA_A | PA_D);
-            attributes |= PA_COW;
-            this_pte.set(attributes, pfn);
-
-            increase_refcount(pfn_to_page(pfn));
-
-            // TODO: create a function to set COW mappings
-            attributes = other_pte.attributes();
-            attributes &= ~PA_RW;
-            attributes |= PA_COW;
-            other_pte.set(attributes, pfn);
-
-            ++this_iter, ++other_iter;
-        }
-    }
-
-    __invalidate_all_tlb();
-}
-
-mm_list::~mm_list() {
-    if (!m_pt)
-        return;
-
-    clear();
-    __dealloc_page_table(m_pt);
-}
-
-bool mm_list::is_avail(uintptr_t start, std::size_t len) const noexcept {
-    start &= ~0xfff;
-    uintptr_t end = (start + len + 0xfff) & ~0xfff;
-    len = end - start;
-
-    if (end > USER_SPACE_MEMORY_TOP)
-        return false;
-
-    for (const auto& area : m_areas) {
-        if (!area.is_avail(start, end))
-            return false;
-    }
-    return true;
-}
-
-bool mm_list::is_avail(uintptr_t addr) const {
-    if (addr >= USER_SPACE_MEMORY_TOP)
-        return false;
-
-    auto iter = m_areas.find(addr);
-    return iter == m_areas.end();
-}
-
-uintptr_t mm_list::find_avail(uintptr_t hint, size_t len) const {
-    auto addr = std::max(hint, MMAP_MIN_ADDR);
-
-    while (!is_avail(addr, len)) {
-        auto iter = m_areas.lower_bound(addr);
-        if (iter == m_areas.end())
-            return 0;
-
-        addr = iter->end;
-    }
-
-    return addr;
-}
-
-void mm_list::switch_pd() const noexcept {
-    asm volatile("mov %0, %%cr3" : : "r"(m_pt) : "memory");
-}
-
-int mm_list::register_brk(uintptr_t addr) {
-    assert(m_brk == m_areas.end());
-    if (!is_avail(addr))
-        return -ENOMEM;
-
-    bool inserted;
-    std::tie(m_brk, inserted) = m_areas.emplace(addr, MM_ANONYMOUS | MM_WRITE | MM_BREAK);
-
-    assert(inserted);
-    return 0;
-}
-
-uintptr_t mm_list::set_brk(uintptr_t addr) {
-    using namespace paging;
-    assert(m_brk != m_areas.end());
-    uintptr_t curbrk = m_brk->end;
-
-    addr += 4096 - 1;
-    addr &= ~0xfff;
-
-    if (addr <= curbrk || !is_avail(curbrk, addr - curbrk))
-        return curbrk;
-
-    for (auto pte : vaddr_range{m_pt, curbrk, addr})
-        pte.set(PA_ANONYMOUS_PAGE | PA_NXE, EMPTY_PAGE_PFN);
-
-    m_brk->end = addr;
-    return m_brk->end;
-}
-
-void mm_list::clear() {
-    for (auto iter = m_areas.begin(); iter != m_areas.end(); ++iter)
-        unmap(iter, false);
-
-    __invalidate_all_tlb();
-
-    m_areas.clear();
-    m_brk = m_areas.end();
-}
-
-mm_list::iterator mm_list::split(iterator area, uintptr_t addr) {
-    assert(!(addr & 0xfff));
-    assert(addr > area->start && addr < area->end);
-
-    std::size_t old_len = addr - area->start;
-    std::size_t new_file_offset = 0;
-
-    if (area->mapped_file)
-        new_file_offset = area->file_offset + old_len;
-
-    auto new_end = area->end;
-    area->end = addr;
-
-    auto [iter, inserted] =
-        m_areas.emplace(addr, area->flags, new_end, d_get(area->mapped_file), new_file_offset);
-
-    assert(inserted);
-    return iter;
-}
-
-int mm_list::unmap(iterator area, bool should_invalidate_tlb) {
-    using namespace paging;
-
-    bool should_use_invlpg = area->end - area->start <= 0x4000;
-    auto range = vaddr_range{m_pt, area->start, area->end};
-    uintptr_t cur_addr = area->start;
-
-    // TODO: write back dirty pages
-    for (auto pte : range) {
-        free_page(pte.pfn());
-        pte.clear();
-
-        if (should_invalidate_tlb && should_use_invlpg) {
-            asm volatile("invlpg (%0)" : : "r"(cur_addr) : "memory");
-            cur_addr += 0x1000;
-        }
-    }
-
-    if (should_invalidate_tlb && !should_use_invlpg)
-        __invalidate_all_tlb();
-
-    return 0;
-}
-
-int mm_list::unmap(uintptr_t start, std::size_t length, bool should_invalidate_tlb) {
-    // standard says that addr and len MUST be
-    // page-aligned or the call is invalid
-    if (start & 0xfff)
-        return -EINVAL;
-
-    uintptr_t end = (start + length + 0xfff) & ~0xfff;
-
-    // check address validity
-    if (end > KERNEL_SPACE_START)
-        return -EINVAL;
-    if (end > USER_SPACE_MEMORY_TOP)
-        return -ENOMEM;
-
-    auto iter = m_areas.lower_bound(start);
-    auto iter_end = m_areas.upper_bound(end);
-
-    // start <= iter <= end a.k.a. !(start > *iter) && !(*iter > end)
-    while (iter != iter_end) {
-        // start == iter:
-        // start is between (iter->start, iter->end)
-        //
-        // strip out the area before start
-        if (!(start < *iter) && start != iter->start)
-            iter = split(iter, start);
-
-        // iter.end <= end
-        // it is safe to unmap the area directly
-        if (*iter < end) {
-            if (int ret = unmap(iter, should_invalidate_tlb); ret != 0)
-                return ret;
-
-            iter = m_areas.erase(iter);
-            continue;
-        }
-
-        // end == iter:
-        // end is between [iter->start, iter->end)
-        //
-        // if end == iter->start, no need to strip the area
-        if (end == iter->start) {
-            ++iter;
-            continue;
-        }
-
-        (void)split(iter, end);
-        if (int ret = unmap(iter, should_invalidate_tlb); ret != 0)
-            return ret;
-
-        iter = m_areas.erase(iter);
-
-        // no need to check areas after this
-        break;
-    }
-
-    return 0;
-}
-
-int mm_list::mmap(const map_args& args) {
-    auto& vaddr = args.vaddr;
-    auto& length = args.length;
-    auto& file = args.file;
-    auto& foff = args.file_offset;
-    auto& flags = args.flags;
-
-    assert((vaddr & 0xfff) == 0 && (foff & 0xfff) == 0);
-    assert((length & 0xfff) == 0 && length != 0);
-
-    if (!is_avail(vaddr, length))
-        return -EEXIST;
-
-    using namespace kernel::mem::paging;
-
-    // PA_RW is set during page fault while PA_NXE is preserved
-    // so we set PA_NXE now
-    psattr_t attributes = PA_US;
-    if (!(flags & MM_EXECUTE))
-        attributes |= PA_NXE;
-
-    if (flags & MM_MAPPED) {
-        assert(file);
-
-        auto [area, inserted] =
-            m_areas.emplace(vaddr, flags & ~MM_INTERNAL_MASK, vaddr + length, d_get(file), foff);
-        assert(inserted);
-
-        attributes |= PA_MMAPPED_PAGE;
-        for (auto pte : vaddr_range{m_pt, vaddr, vaddr + length})
-            pte.set(attributes, EMPTY_PAGE_PFN);
-    } else if (flags & MM_ANONYMOUS) {
-        // private mapping of zero-filled pages
-        // TODO: shared mapping
-        auto [area, inserted] = m_areas.emplace(vaddr, (flags & ~MM_INTERNAL_MASK), vaddr + length);
-        assert(inserted);
-
-        attributes |= PA_ANONYMOUS_PAGE;
-        for (auto pte : vaddr_range{m_pt, vaddr, vaddr + length})
-            pte.set(attributes, EMPTY_PAGE_PFN);
-    } else {
-        return -EINVAL;
-    }
-
-    return 0;
-}

+ 4 - 1
src/kernel/mem/mm_list.rs

@@ -352,6 +352,9 @@ impl MMList {
 
 impl Drop for MMList {
     fn drop(&mut self) {
-        self.clear_user();
+        let inner = self.inner.get_mut();
+        assert!(inner.areas.is_empty());
+        assert_eq!(inner.break_start, None);
+        assert_eq!(inner.break_pos, None);
     }
 }

+ 383 - 0
src/kernel/mem/page_alloc.rs

@@ -0,0 +1,383 @@
+use super::address::{PAddr, PFN};
+use crate::intrusive_list::Link;
+use crate::{container_of, prelude::*};
+use bitflags::bitflags;
+use core::sync::atomic::Ordering;
+use core::{ptr::NonNull, sync::atomic::AtomicU32};
+use lazy_static::lazy_static;
+
+const MAX_PAGE_ORDER: u32 = 10;
+const PAGE_ARRAY: *mut Page = 0xffffff8040000000 as *mut Page;
+
+pub(super) type PagePtr = Ptr<Page>;
+
+#[repr(transparent)]
+pub struct Ptr<T>(Option<NonNull<T>>);
+
+impl<T> Clone for Ptr<T> {
+    fn clone(&self) -> Self {
+        Self(self.0)
+    }
+}
+
+impl<T> Copy for Ptr<T> {}
+
+impl<T> Ptr<T> {
+    pub const fn new(ptr: Option<NonNull<T>>) -> Self {
+        Self(ptr)
+    }
+
+    pub fn from_raw(ptr: *mut T) -> Self {
+        Self::new(NonNull::new(ptr))
+    }
+
+    pub fn null() -> Self {
+        Self::new(None)
+    }
+
+    pub fn is_none(&self) -> bool {
+        self.0.is_none()
+    }
+
+    pub fn is_some(&self) -> bool {
+        self.0.is_some()
+    }
+
+    pub fn as_ptr(&self) -> *mut T {
+        self.0.unwrap().as_ptr()
+    }
+
+    pub fn as_ref<'a>(&self) -> &'a T {
+        unsafe { &*self.as_ptr() }
+    }
+
+    pub fn as_mut<'a>(&self) -> &'a mut T {
+        unsafe { &mut *self.as_ptr() }
+    }
+}
+
+impl PagePtr {
+    pub unsafe fn increase_refcount(&self) -> u32 {
+        self.as_mut().increase_refcount()
+    }
+
+    pub unsafe fn decrease_refcount(&self) -> u32 {
+        self.as_mut().decrease_refcount()
+    }
+
+    pub unsafe fn load_refcount(&self) -> u32 {
+        self.as_ref().refcount.load(Ordering::Acquire)
+    }
+
+    fn get_order(&self) -> u32 {
+        self.as_ref().order
+    }
+
+    pub fn is_valid(&self, order: u32) -> bool {
+        self.is_some() && self.get_order() == order
+    }
+
+    fn offset(&self, count: usize) -> Self {
+        match self.0 {
+            Some(non_null_ptr) => {
+                let new_raw_ptr = unsafe { non_null_ptr.as_ptr().add(count) };
+                Self::from_raw(new_raw_ptr)
+            }
+            None => Self::null(),
+        }
+    }
+}
+
+impl Into<PFN> for PagePtr {
+    fn into(self) -> PFN {
+        unsafe { PFN::from(self.as_ptr().offset_from(PAGE_ARRAY) as usize) }
+    }
+}
+
+impl From<PFN> for PagePtr {
+    fn from(pfn: PFN) -> Self {
+        unsafe { Self::from_raw(PAGE_ARRAY.add(pfn.0)) }
+    }
+}
+
+bitflags! {
+    // TODO: Use atomic
+    struct PageFlags: usize {
+        const PRESENT = 1 << 0;
+        const LOCKED  = 1 << 1;
+        const BUDDY   = 1 << 2;
+        const SLAB    = 1 << 3;
+        const DIRTY   = 1 << 4;
+        const FREE    = 1 << 5;
+    }
+}
+
+pub(super) struct Page {
+    // Now only used for free page links in the buddy system.
+    // Can be used for LRU page swap in the future.
+    link: Link,
+    flags: PageFlags, // TODO: This should be atomic.
+    /// # Safety
+    /// This field is only used in buddy system, which is protected by the global lock.
+    order: u32,
+    refcount: AtomicU32,
+}
+
+struct FreeArea {
+    free_list: Link,
+    count: usize,
+}
+
+/// Safety: `Zone` is `Send` because the `PAGE_ARRAY` is shared between cores.
+unsafe impl Send for Zone {}
+// /// Safety: TODO
+// unsafe impl Sync for Zone {}
+
+struct Zone {
+    free_areas: [FreeArea; MAX_PAGE_ORDER as usize + 1],
+}
+
+impl Page {
+    fn set_flags(&mut self, flags: PageFlags) {
+        self.flags.insert(flags);
+    }
+
+    fn remove_flags(&mut self, flags: PageFlags) {
+        self.flags.remove(flags);
+    }
+
+    fn set_order(&mut self, order: u32) {
+        self.order = order;
+    }
+
+    unsafe fn increase_refcount(&mut self) -> u32 {
+        self.refcount.fetch_add(1, Ordering::Relaxed)
+    }
+
+    unsafe fn decrease_refcount(&mut self) -> u32 {
+        self.refcount.fetch_sub(1, Ordering::AcqRel)
+    }
+
+    pub fn is_buddy(&self) -> bool {
+        self.flags.contains(PageFlags::BUDDY)
+    }
+
+    #[allow(dead_code)]
+    pub fn is_slab(&self) -> bool {
+        self.flags.contains(PageFlags::SLAB)
+    }
+
+    pub fn is_present(&self) -> bool {
+        self.flags.contains(PageFlags::PRESENT)
+    }
+
+    pub fn is_free(&self) -> bool {
+        self.flags.contains(PageFlags::FREE)
+    }
+}
+
+impl FreeArea {
+    const fn new() -> Self {
+        Self {
+            free_list: Link::new(),
+            count: 0,
+        }
+    }
+
+    fn alloc_pages(&mut self) -> PagePtr {
+        if let Some(pages_link) = self.free_list.next_mut() {
+            assert_ne!(self.count, 0);
+
+            let pages_ptr = unsafe { container_of!(pages_link, Page, link) };
+            let pages_ptr = Ptr::from_raw(pages_ptr);
+
+            self.count -= 1;
+            pages_ptr.as_mut().remove_flags(PageFlags::FREE);
+            pages_link.remove();
+
+            pages_ptr
+        } else {
+            PagePtr::null()
+        }
+    }
+
+    fn add_pages(&mut self, pages_ptr: PagePtr) {
+        self.count += 1;
+        pages_ptr.as_mut().set_flags(PageFlags::FREE);
+        self.free_list.insert(&mut pages_ptr.as_mut().link)
+    }
+
+    fn del_pages(&mut self, pages_ptr: PagePtr) {
+        assert!(self.count >= 1 && pages_ptr.as_ref().is_free());
+        self.count -= 1;
+        pages_ptr.as_mut().remove_flags(PageFlags::FREE);
+        pages_ptr.as_mut().link.remove();
+    }
+}
+
+impl Zone {
+    const fn new() -> Self {
+        Self {
+            free_areas: [const { FreeArea::new() }; MAX_PAGE_ORDER as usize + 1],
+        }
+    }
+
+    fn alloc_pages(&mut self, order: u32) -> PagePtr {
+        for current_order in order..=MAX_PAGE_ORDER {
+            let pages_ptr = self.free_areas[current_order as usize].alloc_pages();
+            if pages_ptr.is_none() {
+                continue;
+            }
+
+            unsafe {
+                pages_ptr.as_mut().increase_refcount();
+            }
+            pages_ptr.as_mut().set_order(order);
+
+            if current_order > order {
+                self.expand(pages_ptr, current_order, order);
+            }
+            assert!(pages_ptr.as_ref().is_present() && !pages_ptr.as_ref().is_free());
+            return pages_ptr;
+        }
+        PagePtr::new(None)
+    }
+
+    fn expand(&mut self, pages_ptr: PagePtr, order: u32, target_order: u32) {
+        assert!(pages_ptr.is_some());
+        let mut offset = 1 << order;
+
+        for order in (target_order..order).rev() {
+            offset >>= 1;
+            let split_pages_ptr = pages_ptr.offset(offset);
+            split_pages_ptr.as_mut().set_order(order);
+            split_pages_ptr.as_mut().set_flags(PageFlags::BUDDY);
+            self.free_areas[order as usize].add_pages(split_pages_ptr);
+        }
+    }
+
+    fn free_pages(&mut self, mut pages_ptr: PagePtr, order: u32) {
+        assert_eq!(unsafe { pages_ptr.load_refcount() }, 0);
+        assert_eq!(pages_ptr.get_order(), order);
+
+        let mut pfn: PFN = pages_ptr.into();
+        let mut current_order = order;
+
+        while current_order < MAX_PAGE_ORDER {
+            let buddy_pfn = pfn.buddy_pfn(current_order);
+            let buddy_pages_ptr = PagePtr::from(buddy_pfn);
+
+            if !self.buddy_check(buddy_pages_ptr, current_order) {
+                break;
+            }
+
+            pages_ptr.as_mut().remove_flags(PageFlags::BUDDY);
+            buddy_pages_ptr.as_mut().remove_flags(PageFlags::BUDDY);
+            self.free_areas[current_order as usize].del_pages(buddy_pages_ptr);
+            pages_ptr = PagePtr::from(pfn.combined_pfn(buddy_pfn));
+            pages_ptr.as_mut().set_flags(PageFlags::BUDDY);
+            pfn = pfn.combined_pfn(buddy_pfn);
+            current_order += 1;
+        }
+
+        pages_ptr.as_mut().set_order(current_order);
+        self.free_areas[current_order as usize].add_pages(pages_ptr);
+    }
+
+    /// This function checks whether a page is free && is the buddy
+    /// we can coalesce a page and its buddy if
+    /// - the buddy is valid(present) &&
+    /// - the buddy is right now in free_areas &&
+    /// - a page and its buddy have the same order &&
+    /// - a page and its buddy are in the same zone.    // check when smp
+    fn buddy_check(&self, pages_ptr: PagePtr, order: u32) -> bool {
+        if !pages_ptr.as_ref().is_present() {
+            return false;
+        }
+        if !(pages_ptr.as_ref().is_free()) {
+            return false;
+        }
+        if pages_ptr.as_ref().order != order {
+            return false;
+        }
+
+        assert_eq!(unsafe { pages_ptr.load_refcount() }, 0);
+        true
+    }
+
+    /// Only used on buddy initialization
+    fn create_pages(&mut self, start: usize, end: usize) {
+        let mut start_pfn = PAddr::from(start).ceil_pfn();
+        let end_pfn = PAddr::from(end).floor_pfn();
+
+        while start_pfn < end_pfn {
+            let mut order = usize::from(start_pfn).trailing_zeros().min(MAX_PAGE_ORDER);
+
+            while start_pfn + order as usize > end_pfn {
+                order -= 1;
+            }
+            let page_ptr: PagePtr = start_pfn.into();
+            page_ptr.as_mut().set_flags(PageFlags::BUDDY);
+            self.free_areas[order as usize].add_pages(page_ptr);
+            start_pfn = start_pfn + (1 << order) as usize;
+        }
+    }
+}
+
+lazy_static! {
+    static ref ZONE: Spin<Zone> = Spin::new(Zone::new());
+}
+
+pub(super) fn alloc_page() -> PagePtr {
+    ZONE.lock().alloc_pages(0)
+}
+
+pub(super) fn alloc_pages(order: u32) -> PagePtr {
+    ZONE.lock().alloc_pages(order)
+}
+
+pub(super) fn free_pages(page_ptr: PagePtr, order: u32) {
+    ZONE.lock().free_pages(page_ptr, order)
+}
+
+#[no_mangle]
+pub extern "C" fn mark_present(start: usize, end: usize) {
+    let mut start_pfn = PAddr::from(start).ceil_pfn();
+    let end_pfn = PAddr::from(end).floor_pfn();
+    while start_pfn < end_pfn {
+        PagePtr::from(start_pfn)
+            .as_mut()
+            .set_flags(PageFlags::PRESENT);
+        start_pfn = start_pfn + 1;
+    }
+}
+
+#[no_mangle]
+pub extern "C" fn create_pages(start: usize, end: usize) {
+    ZONE.lock().create_pages(start, end);
+}
+
+#[no_mangle]
+pub extern "C" fn page_to_pfn(page: *const Page) -> usize {
+    unsafe { page.offset_from(PAGE_ARRAY) as usize }
+}
+
+#[no_mangle]
+pub extern "C" fn c_alloc_page() -> *const Page {
+    ZONE.lock().alloc_pages(0).as_ptr() as *const Page
+}
+
+#[no_mangle]
+pub extern "C" fn c_alloc_pages(order: u32) -> *const Page {
+    ZONE.lock().alloc_pages(order).as_ptr() as *const Page
+}
+
+#[no_mangle]
+pub extern "C" fn c_alloc_page_table() -> usize {
+    let pfn: PFN = ZONE.lock().alloc_pages(0).into();
+    let paddr: usize = usize::from(pfn) << 12;
+    unsafe {
+        core::ptr::write_bytes(paddr as *mut u8, 0, 4096);
+    }
+    paddr
+}

+ 0 - 256
src/kernel/mem/paging.cc

@@ -1,256 +0,0 @@
-#include <assert.h>
-#include <string.h>
-
-#include <types/list.hpp>
-
-#include <kernel/async/lock.hpp>
-#include <kernel/log.hpp>
-#include <kernel/mem/paging.hpp>
-#include <kernel/mem/slab.hpp>
-#include <kernel/process.hpp>
-
-using namespace types::list;
-
-using namespace kernel::async;
-using namespace kernel::mem::paging;
-
-static struct zone_info {
-    page* next;
-    std::size_t count;
-} zones[52];
-
-static mutex zone_lock;
-
-constexpr unsigned _msb(std::size_t x) {
-    unsigned n = 0;
-    while (x >>= 1)
-        n++;
-    return n;
-}
-
-constexpr pfn_t buddy(pfn_t pfn, unsigned order) {
-    return pfn ^ (1 << (order + 12));
-}
-
-constexpr pfn_t parent(pfn_t pfn, unsigned order) {
-    return pfn & ~(1 << (order + 12));
-}
-
-// call with zone_lock held
-static inline void _zone_list_insert(unsigned order, page* zone) {
-    assert(zone->flags & PAGE_PRESENT && zone->flags & PAGE_BUDDY);
-    assert((zone->flags & 0xff) == 0);
-    zone->flags |= order;
-
-    zones[order].count++;
-    list_insert(&zones[order].next, zone);
-}
-
-// call with zone_lock held
-static inline void _zone_list_remove(unsigned order, page* zone) {
-    assert(zone->flags & PAGE_PRESENT && zone->flags & PAGE_BUDDY);
-    assert(zones[order].count > 0 && (zone->flags & 0xff) == order);
-    zone->flags &= ~0xff;
-
-    zones[order].count--;
-    list_remove(&zones[order].next, zone);
-}
-
-// call with zone_lock held
-static inline page* _zone_list_get(unsigned order) {
-    if (zones[order].count == 0)
-        return nullptr;
-
-    zones[order].count--;
-    auto* pg = list_get(&zones[order].next);
-
-    assert((pg->flags & 0xff) == order);
-    return pg;
-}
-
-// where order represents power of 2
-// call with zone_lock held
-static inline page* _create_zone(pfn_t pfn, unsigned order) {
-    page* zone = pfn_to_page(pfn);
-
-    assert(zone->flags & PAGE_PRESENT);
-    zone->flags |= PAGE_BUDDY;
-
-    _zone_list_insert(order, zone);
-    return zone;
-}
-
-// call with zone_lock held
-static inline void _split_zone(page* zone, unsigned order, unsigned target_order) {
-    while (order > target_order) {
-        pfn_t pfn = page_to_pfn(zone);
-        _create_zone(buddy(pfn, order - 1), order - 1);
-
-        order--;
-    }
-
-    zone->flags &= ~0xff;
-    zone->flags |= target_order;
-}
-
-// call with zone_lock held
-static inline page* _alloc_zone(unsigned order) {
-    for (unsigned i = order; i < 52; ++i) {
-        auto zone = _zone_list_get(i);
-        if (!zone)
-            continue;
-
-        zone->refcount++;
-
-        if (i > order)
-            _split_zone(zone, i, order);
-
-        assert(zone->flags & PAGE_PRESENT && zone->flags & PAGE_BUDDY);
-        return zone;
-    }
-
-    return nullptr;
-}
-
-constexpr uintptr_t _find_mid(uintptr_t l, uintptr_t r) {
-    if (l == r)
-        return l;
-    uintptr_t bit = 1 << _msb(l ^ r);
-
-    return (l & r & ~(bit - 1)) | bit;
-}
-
-static void _recur_create_zone(uintptr_t l, uintptr_t r) {
-    auto mid = _find_mid(l, r);
-    assert(l <= mid);
-
-    // empty zone
-    if (l == mid) {
-        assert(l == r);
-        return;
-    }
-
-    // create [l, r) directly
-    if (r == mid) {
-        auto diff = r - l;
-        int order = 0;
-        while ((1u << order) <= diff) {
-            while (!(diff & (1 << order)))
-                order++;
-            _create_zone(l << 12, order);
-
-            l += (1 << order);
-            diff &= ~(1 << order);
-        }
-
-        return;
-    }
-
-    // split into halves
-    _recur_create_zone(l, mid);
-    _recur_create_zone(mid, r);
-}
-
-void kernel::mem::paging::create_zone(uintptr_t start, uintptr_t end) {
-    start += (4096 - 1);
-    start >>= 12;
-    end >>= 12;
-
-    if (start >= end)
-        return;
-
-    lock_guard_irq lock{zone_lock};
-
-    _recur_create_zone(start, end);
-}
-
-void kernel::mem::paging::mark_present(uintptr_t start, uintptr_t end) {
-    start >>= 12;
-
-    end += (4096 - 1);
-    end >>= 12;
-
-    while (start < end)
-        PAGE_ARRAY[start++].flags |= PAGE_PRESENT;
-}
-
-page* kernel::mem::paging::alloc_pages(unsigned order) {
-    lock_guard_irq lock{zone_lock};
-    auto* zone = _alloc_zone(order);
-    if (!zone)
-        freeze();
-
-    return zone;
-}
-
-page* kernel::mem::paging::alloc_page() {
-    return alloc_pages(0);
-}
-
-pfn_t kernel::mem::paging::alloc_page_table() {
-    page* zone = alloc_page();
-    pfn_t pfn = page_to_pfn(zone);
-
-    memset(physaddr<void>{pfn}, 0x00, 0x1000);
-
-    return pfn;
-}
-
-void kernel::mem::paging::free_pages(page* pg, unsigned order) {
-    lock_guard_irq lock{zone_lock};
-    assert((pg->flags & 0xff) == order);
-
-    if (!(pg->flags & PAGE_BUDDY) || --pg->refcount)
-        return;
-
-    while (order < 52) {
-        pfn_t pfn = page_to_pfn(pg);
-        pfn_t buddy_pfn = buddy(pfn, order);
-        page* buddy_page = pfn_to_page(buddy_pfn);
-
-        if (!(buddy_page->flags & PAGE_BUDDY))
-            break;
-
-        if ((buddy_page->flags & 0xff) != order)
-            break;
-
-        if (buddy_page->refcount)
-            break;
-
-        _zone_list_remove(order, buddy_page);
-
-        if (buddy_page < pg)
-            std::swap(buddy_page, pg);
-
-        buddy_page->flags &= ~(PAGE_BUDDY | 0xff);
-        order++;
-    }
-
-    pg->flags &= ~0xff;
-    _zone_list_insert(order, pg);
-}
-
-void kernel::mem::paging::free_page(page* page) {
-    return free_pages(page, 0);
-}
-
-void kernel::mem::paging::free_pages(pfn_t pfn, unsigned order) {
-    return free_pages(pfn_to_page(pfn), order);
-}
-
-void kernel::mem::paging::free_page(pfn_t pfn) {
-    return free_page(pfn_to_page(pfn));
-}
-
-pfn_t kernel::mem::paging::page_to_pfn(page* _page) {
-    return (pfn_t)(_page - PAGE_ARRAY) * 0x1000;
-}
-
-page* kernel::mem::paging::pfn_to_page(pfn_t pfn) {
-    return PAGE_ARRAY + pfn / 0x1000;
-}
-
-void kernel::mem::paging::increase_refcount(page* pg) {
-    lock_guard_irq lock{zone_lock};
-    pg->refcount++;
-}

+ 25 - 89
src/kernel/mem/paging.rs

@@ -1,89 +1,32 @@
-use crate::bindings::root::kernel::mem::paging::{
-    alloc_page as c_alloc_page, alloc_pages as c_alloc_pages, free_pages as c_free_pages,
-    increase_refcount as c_increase_refcount, page as c_page, page_to_pfn as c_page_to_pfn,
-    pfn_to_page as c_pfn_to_page, PAGE_BUDDY,
-};
+use super::address::PFN;
+use super::page_alloc::{alloc_page, alloc_pages, free_pages, PagePtr};
+use super::phys::PhysPtr;
 use crate::bindings::root::EFAULT;
 use crate::io::{Buffer, FillResult};
 use crate::kernel::mem::phys;
 use core::fmt;
-use core::sync::atomic::{AtomicU64, Ordering};
-
-use super::phys::PhysPtr;
-
-fn msb(x: u64) -> u64 {
-    // What the ****.
-    let mut x = x;
-    x |= x >> 1;
-    x |= x >> 2;
-    x |= x >> 4;
-    x |= x >> 8;
-    x |= x >> 16;
-    x |= x >> 32;
-    x -= x >> 1;
-    x
-}
-
-fn msb_position(x: u64) -> Option<u32> {
-    if x == 0 {
-        return None;
-    }
-
-    let mut pos = 0;
-    let mut x = x;
-    if x >= 1 << 32 {
-        x >>= 32;
-        pos += 32;
-    }
-    if x >= 1 << 16 {
-        x >>= 16;
-        pos += 16;
-    }
-    if x >= 1 << 8 {
-        x >>= 8;
-        pos += 8;
-    }
-    if x >= 1 << 4 {
-        x >>= 4;
-        pos += 4;
-    }
-    if x >= 1 << 2 {
-        x >>= 2;
-        pos += 2;
-    }
-    if x >= 1 {
-        pos += 1;
-    }
-
-    Some(pos)
-}
 
 pub struct Page {
-    page_ptr: *mut c_page,
+    page_ptr: PagePtr,
     order: u32,
 }
 
 #[allow(dead_code)]
 impl Page {
     pub fn alloc_one() -> Self {
-        let page_ptr = unsafe { c_alloc_page() };
-
+        let page_ptr = alloc_page();
         Self { page_ptr, order: 0 }
     }
 
     pub fn alloc_many(order: u32) -> Self {
-        let page_ptr = unsafe { c_alloc_pages(order) };
-
+        let page_ptr = alloc_pages(order);
         Self { page_ptr, order }
     }
 
     /// Allocate a contiguous block of pages that can contain at least `count` pages.
     pub fn alloc_ceil(count: usize) -> Self {
         assert_ne!(count, 0);
-        let count_msb = msb(count as u64) as usize;
-        let order = msb_position((count + count_msb - 1) as u64)
-            .expect("`count` can't be 0, so can't `order`");
-
+        let order = count.next_power_of_two().trailing_zeros();
         Self::alloc_many(order)
     }
 
@@ -92,16 +35,11 @@ impl Page {
     /// # Safety
     /// Caller must ensure that the pfn is no longer referenced by any other code.
     pub unsafe fn take_pfn(pfn: usize, order: u32) -> Self {
-        let page_ptr = unsafe { c_pfn_to_page(pfn) };
+        let page_ptr: PagePtr = PFN::from(pfn >> 12).into();
 
         // Only buddy pages can be used here.
-        assert!(unsafe { page_ptr.as_ref().unwrap() }.flags & PAGE_BUDDY != 0);
-
-        // Check if the order is correct.
-        assert_eq!(
-            unsafe { page_ptr.as_ref().unwrap() }.flags & 0xff,
-            order as u64
-        );
+        // Also, check if the order is correct.
+        assert!(page_ptr.as_ref().is_buddy() && page_ptr.is_valid(order));
 
         Self { page_ptr, order }
     }
@@ -112,7 +50,7 @@ impl Page {
     /// Caller must ensure that `pfn` refers to a valid physical frame number with `refcount` > 0.
     pub unsafe fn from_pfn(pfn: usize, order: u32) -> Self {
         // SAFETY: `pfn` is a valid physical frame number with refcount > 0.
-        unsafe { Self::increase_refcount(pfn) };
+        Self::increase_refcount(pfn);
 
         // SAFETY: `pfn` has an increased refcount.
         unsafe { Self::take_pfn(pfn, order) }
@@ -121,9 +59,9 @@ impl Page {
     /// Consumes the `Page` and returns the physical frame number without dropping the reference
     /// count the page holds.
     pub fn into_pfn(self) -> usize {
-        let pfn = unsafe { c_page_to_pfn(self.page_ptr) };
+        let pfn: PFN = self.page_ptr.into();
         core::mem::forget(self);
-        pfn
+        usize::from(pfn) << 12
     }
 
     pub fn len(&self) -> usize {
@@ -131,7 +69,8 @@ impl Page {
     }
 
     pub fn as_phys(&self) -> usize {
-        unsafe { c_page_to_pfn(self.page_ptr) }
+        let pfn: PFN = self.page_ptr.into();
+        usize::from(pfn) << 12
     }
 
     pub fn as_cached(&self) -> phys::CachedPP {
@@ -153,23 +92,18 @@ impl Page {
     /// # Safety
     /// Caller must ensure that the page is properly freed.
     pub unsafe fn increase_refcount(pfn: usize) {
-        let page = unsafe { c_pfn_to_page(pfn) };
-
-        unsafe {
-            c_increase_refcount(page);
-        }
+        let page_ptr: PagePtr = PFN::from(pfn >> 12).into();
+        page_ptr.increase_refcount();
     }
 
     pub unsafe fn load_refcount(&self) -> usize {
-        AtomicU64::from_ptr(&mut (*self.page_ptr).refcount).load(Ordering::Acquire) as usize
+        self.page_ptr.load_refcount() as usize
     }
 }
 
 impl Clone for Page {
     fn clone(&self) -> Self {
-        unsafe {
-            c_increase_refcount(self.page_ptr);
-        }
+        unsafe { self.page_ptr.increase_refcount() };
 
         Self {
             page_ptr: self.page_ptr,
@@ -180,17 +114,19 @@ impl Clone for Page {
 
 impl Drop for Page {
     fn drop(&mut self) {
-        unsafe {
-            c_free_pages(self.page_ptr, self.order);
+        match unsafe { self.page_ptr.decrease_refcount() } {
+            0 => panic!("In-use page refcount is 0"),
+            1 => free_pages(self.page_ptr, self.order),
+            _ => {}
         }
     }
 }
 
 impl PartialEq for Page {
     fn eq(&self, other: &Self) -> bool {
-        assert!(self.page_ptr != other.page_ptr || self.order == other.order);
+        // assert!(self.page_ptr != other.page_ptr || self.order == other.order);
 
-        self.page_ptr == other.page_ptr
+        self.page_ptr.as_ptr() == other.page_ptr.as_ptr()
     }
 }
 

+ 3 - 3
src/kernel/mem/slab.cc

@@ -58,10 +58,10 @@ slab_head* _make_slab(uintptr_t start, std::size_t size) {
 }
 
 void _slab_add_page(slab_cache* cache) {
-    auto* new_page = paging::alloc_page();
-    auto new_page_pfn = paging::page_to_pfn(new_page);
+    auto new_page_pfn = page_to_pfn(c_alloc_page()) << 12;
 
-    new_page->flags |= paging::PAGE_SLAB;
+    // TODO!!!
+    // new_page->flags |= paging::PAGE_SLAB;
 
     auto* slab = _make_slab(new_page_pfn, cache->obj_size);
     slab->cache = cache;

+ 0 - 168
src/kernel/mem/vrange.rs

@@ -1,168 +0,0 @@
-use core::{
-    cmp::Ordering,
-    fmt::{self, Debug, Formatter},
-    ops::{Add, RangeBounds, Sub},
-};
-
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
-pub struct VAddr(pub usize);
-
-#[derive(Clone, Copy)]
-pub struct VRange {
-    start: VAddr,
-    end: VAddr,
-}
-
-const USER_SPACE_MEMORY_TOP: VAddr = VAddr(0x8000_0000_0000);
-
-impl VAddr {
-    pub const NULL: Self = Self(0);
-
-    pub fn floor(&self) -> Self {
-        VAddr(self.0 & !0xfff)
-    }
-
-    pub fn ceil(&self) -> Self {
-        VAddr((self.0 + 0xfff) & !0xfff)
-    }
-
-    pub fn is_user(&self) -> bool {
-        self.0 != 0 && self < &USER_SPACE_MEMORY_TOP
-    }
-}
-
-impl Sub for VAddr {
-    type Output = usize;
-
-    fn sub(self, rhs: Self) -> Self::Output {
-        self.0 - rhs.0
-    }
-}
-
-impl Add<usize> for VAddr {
-    type Output = Self;
-
-    fn add(self, rhs: usize) -> Self::Output {
-        VAddr(self.0 + rhs)
-    }
-}
-
-impl Sub<usize> for VAddr {
-    type Output = Self;
-
-    fn sub(self, rhs: usize) -> Self::Output {
-        VAddr(self.0 - rhs)
-    }
-}
-
-impl Debug for VAddr {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-        write!(f, "V{:#x}", self.0)
-    }
-}
-
-impl Debug for VRange {
-    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
-        write!(f, "[{:?}, {:?})", self.start, self.end)
-    }
-}
-
-impl Eq for VRange {}
-impl PartialOrd for VRange {
-    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
-        Some(self.cmp(other))
-    }
-}
-
-impl PartialEq for VRange {
-    fn eq(&self, other: &Self) -> bool {
-        self.cmp(other) == Ordering::Equal
-    }
-}
-
-/// Any two ranges that have one of them containing the other are considered equal.
-impl Ord for VRange {
-    fn cmp(&self, other: &Self) -> Ordering {
-        if self.start == other.start {
-            return Ordering::Equal;
-        }
-
-        if self.end == other.end {
-            if self.start == self.end {
-                return Ordering::Greater;
-            }
-            if other.start == other.end {
-                return Ordering::Less;
-            }
-            return Ordering::Equal;
-        }
-
-        if self.start < other.start {
-            if other.end < self.end {
-                return Ordering::Equal;
-            } else {
-                return Ordering::Less;
-            }
-        }
-
-        if other.start < self.start {
-            if self.end < other.end {
-                return Ordering::Equal;
-            } else {
-                return Ordering::Greater;
-            }
-        }
-
-        unreachable!()
-    }
-}
-
-impl From<VAddr> for VRange {
-    fn from(addr: VAddr) -> Self {
-        VRange::new(addr, addr)
-    }
-}
-
-impl VRange {
-    pub fn new(start: VAddr, end: VAddr) -> Self {
-        assert!(start <= end);
-        VRange { start, end }
-    }
-
-    pub fn is_overlapped(&self, other: &Self) -> bool {
-        self == other
-    }
-
-    pub fn is_user(&self) -> bool {
-        self.start < USER_SPACE_MEMORY_TOP && self.end <= USER_SPACE_MEMORY_TOP
-    }
-
-    pub fn start(&self) -> VAddr {
-        self.start
-    }
-
-    pub fn end(&self) -> VAddr {
-        self.end
-    }
-
-    pub fn len(&self) -> usize {
-        self.end.0 - self.start.0
-    }
-
-    pub fn shrink(&self, count: usize) -> Self {
-        assert!(count <= self.len());
-        VRange::new(self.start, self.end - count)
-    }
-
-    pub fn grow(&self, count: usize) -> Self {
-        VRange::new(self.start, self.end + count)
-    }
-
-    pub fn into_range(self) -> impl RangeBounds<Self> {
-        if self.len() == 0 {
-            VRange::from(self.start())..=VRange::from(self.start())
-        } else {
-            VRange::from(self.start())..=VRange::from(self.end() - 1)
-        }
-    }
-}

+ 11 - 9
src/kernel/smp.rs

@@ -5,6 +5,7 @@ use crate::{
     kernel::{
         cpu::current_cpu,
         mem::{paging::Page, phys::PhysPtr as _},
+        task::{Process, Thread},
     },
     println_debug,
     sync::preempt,
@@ -12,7 +13,7 @@ use crate::{
 
 use super::{
     cpu::init_thiscpu,
-    task::{ProcessList, Scheduler, Thread},
+    task::{ProcessList, Scheduler},
 };
 
 define_smp_bootstrap!(4, ap_entry, {
@@ -27,16 +28,17 @@ unsafe extern "C" fn ap_entry() {
     println_debug!("AP{} started", current_cpu().cpuid());
 
     {
-        let idle_process = ProcessList::get()
-            .try_find_process(0)
-            .expect("Idle process must exist");
+        let mut procs = ProcessList::get().lock_nosleep();
+        let idle_process = procs.idle_process().clone();
 
         let idle_thread_name = format!("[kernel idle#AP{}]", 0);
-        let idle_thread =
-            Thread::new_for_init(Arc::from(idle_thread_name.as_bytes()), &idle_process);
-        ProcessList::get().add_thread(&idle_thread);
-        Scheduler::set_idle(idle_thread.clone());
-        Scheduler::set_current(idle_thread);
+        let idle_thread = Thread::new_for_init(
+            Arc::from(idle_thread_name.as_bytes()),
+            Process::alloc_pid(),
+            &idle_process,
+            procs.as_mut(),
+        );
+        Scheduler::set_idle_and_current(idle_thread);
     }
 
     preempt::disable();

+ 24 - 16
src/kernel/syscall/procops.rs

@@ -15,7 +15,7 @@ use crate::kernel::user::dataflow::UserString;
 use crate::kernel::user::{UserPointer, UserPointerMut};
 use crate::kernel::vfs::dentry::Dentry;
 use crate::path::Path;
-use crate::sync::preempt;
+use crate::sync::{preempt, AsRefPosition as _};
 use crate::{kernel::user::dataflow::UserBuffer, prelude::*};
 
 use crate::kernel::vfs::{self, FsContext};
@@ -152,7 +152,10 @@ fn sys_execve(int_stack: &mut InterruptContext, _mmxregs: &mut mmx_registers) ->
 // TODO: Find a better way.
 #[allow(unreachable_code)]
 fn do_exit(status: u32) -> KResult<()> {
-    ProcessList::get().do_kill_process(&Thread::current().process, WaitType::Exited(status));
+    {
+        let mut procs = ProcessList::get().lock();
+        procs.do_kill_process(&Thread::current().process, WaitType::Exited(status));
+    }
     Scheduler::schedule_noreturn();
     panic!("schedule_noreturn returned!");
 }
@@ -217,22 +220,24 @@ fn do_setpgid(pid: u32, pgid: i32) -> KResult<()> {
 
 fn do_getsid(pid: u32) -> KResult<u32> {
     if pid == 0 {
-        Ok(Thread::current().process.sid())
+        Ok(Thread::current().process.session_rcu().sid)
     } else {
-        ProcessList::get()
+        let procs = ProcessList::get().lock_shared();
+        procs
             .try_find_process(pid)
-            .map(|proc| proc.sid())
+            .map(|proc| proc.session(procs.as_pos()).sid)
             .ok_or(ESRCH)
     }
 }
 
 fn do_getpgid(pid: u32) -> KResult<u32> {
     if pid == 0 {
-        Ok(Thread::current().process.pgid())
+        Ok(Thread::current().process.pgroup_rcu().pgid)
     } else {
-        ProcessList::get()
+        let procs = ProcessList::get().lock_shared();
+        procs
             .try_find_process(pid)
-            .map(|proc| proc.pgid())
+            .map(|proc| proc.pgroup(procs.as_pos()).pgid)
             .ok_or(ESRCH)
     }
 }
@@ -242,7 +247,7 @@ fn do_getpid() -> KResult<u32> {
 }
 
 fn do_getppid() -> KResult<u32> {
-    Ok(Thread::current().process.parent().map_or(0, |x| x.pid))
+    Ok(Thread::current().process.parent_rcu().map_or(0, |x| x.pid))
 }
 
 fn do_getuid() -> KResult<u32> {
@@ -307,6 +312,7 @@ fn do_prctl(option: u32, arg2: usize) -> KResult<()> {
 }
 
 fn do_kill(pid: i32, sig: u32) -> KResult<()> {
+    let procs = ProcessList::get().lock_shared();
     match pid {
         // Send signal to every process for which the calling process has
         // permission to send signals.
@@ -314,18 +320,18 @@ fn do_kill(pid: i32, sig: u32) -> KResult<()> {
         // Send signal to every process in the process group.
         0 => Thread::current()
             .process
-            .pgroup()
-            .raise(Signal::try_from(sig)?),
+            .pgroup(procs.as_pos())
+            .raise(Signal::try_from(sig)?, procs.as_pos()),
         // Send signal to the process with the specified pid.
-        1.. => ProcessList::get()
+        1.. => procs
             .try_find_process(pid as u32)
             .ok_or(ESRCH)?
-            .raise(Signal::try_from(sig)?),
+            .raise(Signal::try_from(sig)?, procs.as_pos()),
         // Send signal to the process group with the specified pgid equals to `-pid`.
-        ..-1 => ProcessList::get()
+        ..-1 => procs
             .try_find_pgroup((-pid) as u32)
             .ok_or(ESRCH)?
-            .raise(Signal::try_from(sig)?),
+            .raise(Signal::try_from(sig)?, procs.as_pos()),
     }
 
     Ok(())
@@ -333,6 +339,7 @@ fn do_kill(pid: i32, sig: u32) -> KResult<()> {
 
 fn do_tkill(tid: u32, sig: u32) -> KResult<()> {
     ProcessList::get()
+        .lock_shared()
         .try_find_thread(tid)
         .ok_or(ESRCH)?
         .raise(Signal::try_from(sig)?);
@@ -452,7 +459,8 @@ define_syscall32!(sys_rt_sigaction, do_rt_sigaction,
     signum: u32, act: *const UserSignalAction, oldact: *mut UserSignalAction, sigsetsize: usize);
 
 fn sys_fork(int_stack: &mut InterruptContext, _mmxregs: &mut mmx_registers) -> usize {
-    let new_thread = Thread::new_cloned(&Thread::current());
+    let mut procs = ProcessList::get().lock();
+    let new_thread = Thread::current().new_cloned(procs.as_mut());
     let mut new_int_stack = int_stack.clone();
     new_int_stack.rax = 0;
     new_int_stack.eflags = 0x200;

+ 9 - 4
src/kernel/task.rs

@@ -1,13 +1,18 @@
 mod kstack;
+mod process;
+mod process_group;
+mod process_list;
 mod scheduler;
+mod session;
 mod signal;
 mod thread;
 
 pub(self) use kstack::KernelStack;
 
+pub use process::{Process, WaitObject, WaitType};
+pub use process_group::ProcessGroup;
+pub use process_list::{init_multitasking, ProcessList};
 pub use scheduler::Scheduler;
+pub use session::Session;
 pub use signal::{Signal, SignalAction};
-pub use thread::{
-    init_multitasking, ProcessList, Session, Thread, ThreadState, UserDescriptor, WaitObject,
-    WaitType,
-};
+pub use thread::{Thread, ThreadState, UserDescriptor};

+ 511 - 0
src/kernel/task/process.rs

@@ -0,0 +1,511 @@
+use core::{
+    ptr::addr_of,
+    sync::atomic::{AtomicU32, Ordering},
+};
+
+use alloc::{
+    collections::{btree_map::BTreeMap, vec_deque::VecDeque},
+    sync::{Arc, Weak},
+};
+use bindings::{ECHILD, EINTR, EPERM, ESRCH};
+
+use crate::{
+    kernel::mem::MMList,
+    prelude::*,
+    rcu::{rcu_sync, RCUPointer, RCUReadGuard},
+    sync::{
+        AsRefMutPosition as _, AsRefPosition as _, CondVar, RefMutPosition, RefPosition,
+        RwSemReadGuard, SpinGuard,
+    },
+};
+
+use super::{signal::RaiseResult, ProcessGroup, ProcessList, Session, Signal, Thread};
+
+#[derive(Debug)]
+pub struct Process {
+    /// Process id
+    ///
+    /// This should never change during the life of the process.
+    pub pid: u32,
+
+    pub wait_list: WaitList,
+    pub mm_list: Arc<MMList>,
+
+    /// Parent process
+    ///
+    /// `parent` must be valid during the whole life of the process.
+    /// The only case where it may be `None` is when it is the init process
+    /// or the process is kernel thread.
+    pub(super) parent: RCUPointer<Process>,
+
+    /// Process group
+    ///
+    /// `pgroup` must be valid during the whole life of the process.
+    /// The only case where it may be `None` is when the process is kernel thread.
+    pub(super) pgroup: RCUPointer<ProcessGroup>,
+
+    /// Session
+    ///
+    /// `session` must be valid during the whole life of the process.
+    /// The only case where it may be `None` is when the process is kernel thread.
+    pub(super) session: RCUPointer<Session>,
+
+    /// All things related to the process list.
+    pub(super) inner: Locked<ProcessInner, ProcessList>,
+}
+
+#[derive(Debug)]
+pub(super) struct ProcessInner {
+    pub(super) children: BTreeMap<u32, Weak<Process>>,
+    pub(super) threads: BTreeMap<u32, Weak<Thread>>,
+}
+
+#[derive(Debug)]
+pub struct WaitList {
+    wait_procs: Spin<VecDeque<WaitObject>>,
+    cv_wait_procs: CondVar,
+}
+
+pub struct NotifyBatch<'waitlist, 'process, 'cv> {
+    wait_procs: SpinGuard<'waitlist, VecDeque<WaitObject>>,
+    process: &'process Process,
+    cv: &'cv CondVar,
+    needs_notify: bool,
+}
+
+pub struct Entry<'waitlist, 'proclist, 'cv> {
+    wait_procs: SpinGuard<'waitlist, VecDeque<WaitObject>>,
+    process_list: RwSemReadGuard<'proclist, ProcessList>,
+    cv: &'cv CondVar,
+    want_stop: bool,
+    want_continue: bool,
+}
+
+pub struct DrainExited<'waitlist> {
+    wait_procs: SpinGuard<'waitlist, VecDeque<WaitObject>>,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum WaitType {
+    Exited(u32),
+    Signaled(Signal),
+    Stopped(Signal),
+    Continued,
+}
+
+#[derive(Debug, Clone, Copy)]
+pub struct WaitObject {
+    pub pid: u32,
+    pub code: WaitType,
+}
+
+impl WaitType {
+    pub fn to_wstatus(self) -> u32 {
+        match self {
+            WaitType::Exited(status) => (status & 0xff) << 8,
+            WaitType::Signaled(signal) if signal.is_coredump() => signal.to_signum() | 0x80,
+            WaitType::Signaled(signal) => signal.to_signum(),
+            WaitType::Stopped(signal) => 0x7f | (signal.to_signum() << 8),
+            WaitType::Continued => 0xffff,
+        }
+    }
+}
+
+impl WaitObject {
+    pub fn stopped(&self) -> Option<Signal> {
+        if let WaitType::Stopped(signal) = self.code {
+            Some(signal)
+        } else {
+            None
+        }
+    }
+
+    pub fn is_continue(&self) -> bool {
+        matches!(self.code, WaitType::Continued)
+    }
+}
+
+/// PID 0 and 1 is created manually so we start from 2.
+static NEXT_PID: AtomicU32 = AtomicU32::new(2);
+impl Process {
+    pub fn alloc_pid() -> u32 {
+        NEXT_PID.fetch_add(1, Ordering::Relaxed)
+    }
+
+    pub fn new_cloned(other: &Arc<Self>, procs: &mut ProcessList) -> Arc<Self> {
+        let procs_addr = addr_of!(*procs);
+
+        // SAFETY: We are holding the process list lock.
+        let other_pgroup = unsafe { other.pgroup.load_locked().unwrap() };
+        let other_session = unsafe { other.session.load_locked().unwrap() };
+
+        let process = Arc::new(Self {
+            pid: Self::alloc_pid(),
+            wait_list: WaitList::new(),
+            mm_list: MMList::new_cloned(&other.mm_list),
+            parent: RCUPointer::new_with(other.clone()),
+            pgroup: RCUPointer::new_with(other_pgroup.clone()),
+            session: RCUPointer::new_with(other_session.clone()),
+            inner: Locked::new(
+                ProcessInner {
+                    children: BTreeMap::new(),
+                    threads: BTreeMap::new(),
+                },
+                procs_addr,
+            ),
+        });
+
+        procs.add_process(&process);
+        other.add_child(&process, procs.as_pos_mut());
+        other_pgroup.add_member(&process, procs.as_pos_mut());
+        process
+    }
+
+    pub(super) unsafe fn new_for_init(pid: u32, procs: &mut ProcessList) -> Arc<Self> {
+        Arc::new(Self {
+            pid,
+            wait_list: WaitList::new(),
+            mm_list: MMList::new(),
+            parent: RCUPointer::empty(),
+            pgroup: RCUPointer::empty(),
+            session: RCUPointer::empty(),
+            inner: Locked::new(
+                ProcessInner {
+                    children: BTreeMap::new(),
+                    threads: BTreeMap::new(),
+                },
+                procs,
+            ),
+        })
+    }
+
+    pub fn raise(&self, signal: Signal, procs: RefPosition<'_, ProcessList>) {
+        let inner = self.inner.access(procs);
+        for thread in inner.threads.values().map(|t| t.upgrade().unwrap()) {
+            if let RaiseResult::Finished = thread.raise(signal) {
+                break;
+            }
+        }
+    }
+
+    pub(super) fn add_child(&self, child: &Arc<Process>, procs: RefMutPosition<'_, ProcessList>) {
+        assert!(self
+            .inner
+            .access_mut(procs)
+            .children
+            .insert(child.pid, Arc::downgrade(child))
+            .is_none());
+    }
+
+    pub(super) fn add_thread(&self, thread: &Arc<Thread>, procs: RefMutPosition<'_, ProcessList>) {
+        assert!(self
+            .inner
+            .access_mut(procs)
+            .threads
+            .insert(thread.tid, Arc::downgrade(thread))
+            .is_none());
+    }
+
+    pub fn wait(
+        &self,
+        no_block: bool,
+        trace_stop: bool,
+        trace_continue: bool,
+    ) -> KResult<Option<WaitObject>> {
+        let wait_object = {
+            let mut waits = self.wait_list.entry(trace_stop, trace_continue);
+            loop {
+                if let Some(object) = waits.get() {
+                    break object;
+                }
+
+                if self
+                    .inner
+                    .access(waits.process_list.as_pos())
+                    .children
+                    .is_empty()
+                {
+                    return Err(ECHILD);
+                }
+
+                if no_block {
+                    return Ok(None);
+                }
+
+                waits.wait()?;
+            }
+        };
+
+        if wait_object.stopped().is_some() || wait_object.is_continue() {
+            Ok(Some(wait_object))
+        } else {
+            let mut procs = ProcessList::get().lock();
+            procs.remove_process(wait_object.pid);
+            assert!(self
+                .inner
+                .access_mut(procs.as_pos_mut())
+                .children
+                .remove(&wait_object.pid)
+                .is_some());
+
+            Ok(Some(wait_object))
+        }
+    }
+
+    /// Create a new session for the process.
+    pub fn setsid(self: &Arc<Self>) -> KResult<u32> {
+        let mut procs = ProcessList::get().lock();
+        // If there exists a session that has the same sid as our pid, we can't create a new
+        // session. The standard says that we should create a new process group and be the
+        // only process in the new process group and session.
+        if procs.try_find_session(self.pid).is_some() {
+            return Err(EPERM);
+        }
+        let session = Session::new(procs.as_mut(), self);
+        let pgroup = session.new_group(procs.as_mut(), self);
+
+        {
+            let _old_session = unsafe { self.session.swap(Some(session.clone())) }.unwrap();
+            let old_pgroup = unsafe { self.pgroup.swap(Some(pgroup.clone())) }.unwrap();
+            old_pgroup.remove_member(self.pid, procs.as_pos_mut());
+            rcu_sync();
+        }
+
+        Ok(pgroup.pgid)
+    }
+
+    /// Set the process group id of the process to `pgid`.
+    ///
+    /// This function does the actual work.
+    fn do_setpgid(self: &Arc<Self>, pgid: u32, procs: &mut ProcessList) -> KResult<()> {
+        // SAFETY: We are holding the process list lock.
+        let session = unsafe { self.session.load_locked().unwrap() };
+        let pgroup = unsafe { self.pgroup.load_locked().unwrap() };
+
+        // Changing the process group of a session leader is not allowed.
+        if session.sid == self.pid {
+            return Err(EPERM);
+        }
+
+        let new_pgroup = if let Some(new_pgroup) = procs.try_find_pgroup(pgid) {
+            // Move us to an existing process group.
+            // Check that the two groups are in the same session.
+            if new_pgroup.session.upgrade().unwrap().sid != session.sid {
+                return Err(EPERM);
+            }
+
+            // If we are already in the process group, we are done.
+            if new_pgroup.pgid == pgroup.pgid {
+                return Ok(());
+            }
+
+            new_pgroup
+        } else {
+            // Create a new process group only if `pgid` matches our `pid`.
+            if pgid != self.pid {
+                return Err(EPERM);
+            }
+
+            session.new_group(procs, self)
+        };
+
+        pgroup.remove_member(self.pid, procs.as_pos_mut());
+        {
+            let _old_pgroup = unsafe { self.pgroup.swap(Some(new_pgroup)) }.unwrap();
+            rcu_sync();
+        }
+
+        Ok(())
+    }
+
+    /// Set the process group id of the process `pid` to `pgid`.
+    ///
+    /// This function should be called on the process that issued the syscall in order to do
+    /// permission checks.
+    pub fn setpgid(self: &Arc<Self>, pid: u32, pgid: u32) -> KResult<()> {
+        let mut procs = ProcessList::get().lock();
+        // We may set pgid of either the calling process or a child process.
+        if pid == self.pid {
+            self.do_setpgid(pgid, procs.as_mut())
+        } else {
+            let child = {
+                // If `pid` refers to one of our children, the thread leaders must be
+                // in out children list.
+                let children = &self.inner.access(procs.as_pos()).children;
+                let child = {
+                    let child = children.get(&pid);
+                    child.and_then(Weak::upgrade).ok_or(ESRCH)?
+                };
+
+                // Changing the process group of a child is only allowed
+                // if we are in the same session.
+                if child.session(procs.as_pos()).sid != self.session(procs.as_pos()).sid {
+                    return Err(EPERM);
+                }
+
+                child
+            };
+
+            // TODO: Check whether we, as a child, have already performed an `execve`.
+            //       If so, we should return `Err(EACCES)`.
+            child.do_setpgid(pgid, procs.as_mut())
+        }
+    }
+
+    /// Provide locked (consistent) access to the session.
+    pub fn session<'r>(&'r self, _procs: RefPosition<'r, ProcessList>) -> BorrowedArc<'r, Session> {
+        // SAFETY: We are holding the process list lock.
+        unsafe { self.session.load_locked() }.unwrap()
+    }
+
+    /// Provide locked (consistent) access to the process group.
+    pub fn pgroup<'r>(
+        &'r self,
+        _procs: RefPosition<'r, ProcessList>,
+    ) -> BorrowedArc<'r, ProcessGroup> {
+        // SAFETY: We are holding the process list lock.
+        unsafe { self.pgroup.load_locked() }.unwrap()
+    }
+
+    /// Provide locked (consistent) access to the parent process.
+    pub fn parent<'r>(&'r self, _procs: RefPosition<'r, ProcessList>) -> BorrowedArc<'r, Process> {
+        // SAFETY: We are holding the process list lock.
+        unsafe { self.parent.load_locked() }.unwrap()
+    }
+
+    /// Provide RCU locked (maybe inconsistent) access to the session.
+    pub fn session_rcu(&self) -> RCUReadGuard<'_, BorrowedArc<Session>> {
+        self.session.load().unwrap()
+    }
+
+    /// Provide RCU locked (maybe inconsistent) access to the process group.
+    pub fn pgroup_rcu(&self) -> RCUReadGuard<'_, BorrowedArc<ProcessGroup>> {
+        self.pgroup.load().unwrap()
+    }
+
+    /// Provide RCU locked (maybe inconsistent) access to the parent process.
+    pub fn parent_rcu(&self) -> Option<RCUReadGuard<'_, BorrowedArc<Process>>> {
+        self.parent.load()
+    }
+
+    pub fn notify(&self, wait: WaitObject, procs: RefPosition<'_, ProcessList>) {
+        self.wait_list.notify(wait);
+        self.raise(Signal::SIGCHLD, procs);
+    }
+
+    pub fn notify_batch(&self) -> NotifyBatch<'_, '_, '_> {
+        NotifyBatch {
+            wait_procs: self.wait_list.wait_procs.lock(),
+            process: self,
+            cv: &self.wait_list.cv_wait_procs,
+            needs_notify: false,
+        }
+    }
+}
+
+impl WaitList {
+    pub fn new() -> Self {
+        Self {
+            wait_procs: Spin::new(VecDeque::new()),
+            cv_wait_procs: CondVar::new(),
+        }
+    }
+
+    fn notify(&self, wait: WaitObject) {
+        let mut wait_procs = self.wait_procs.lock();
+        wait_procs.push_back(wait);
+        self.cv_wait_procs.notify_all();
+    }
+
+    pub fn drain_exited(&self) -> DrainExited {
+        DrainExited {
+            wait_procs: self.wait_procs.lock(),
+        }
+    }
+
+    /// # Safety
+    /// Locks `ProcessList` and `WaitList` at the same time. When `wait` is called,
+    /// releases the lock on `ProcessList` and `WaitList` and waits on `cv_wait_procs`.
+    pub fn entry(&self, want_stop: bool, want_continue: bool) -> Entry {
+        Entry {
+            process_list: ProcessList::get().lock_shared(),
+            wait_procs: self.wait_procs.lock(),
+            cv: &self.cv_wait_procs,
+            want_stop,
+            want_continue,
+        }
+    }
+}
+
+impl Entry<'_, '_, '_> {
+    pub fn get(&mut self) -> Option<WaitObject> {
+        if let Some(idx) = self
+            .wait_procs
+            .iter()
+            .enumerate()
+            .filter(|(_, item)| {
+                if item.stopped().is_some() {
+                    self.want_stop
+                } else if item.is_continue() {
+                    self.want_continue
+                } else {
+                    true
+                }
+            })
+            .map(|(idx, _)| idx)
+            .next()
+        {
+            Some(self.wait_procs.remove(idx).unwrap())
+        } else {
+            None
+        }
+    }
+
+    pub fn wait(&mut self) -> KResult<()> {
+        // SAFETY: We will lock it again after returning from `cv.wait`.
+        unsafe { self.wait_procs.force_unlock() };
+
+        self.cv.wait(&mut self.process_list);
+
+        // SAFETY: We will lock it again.
+        unsafe { self.wait_procs.force_relock() };
+
+        if Thread::current().signal_list.has_pending_signal() {
+            return Err(EINTR);
+        }
+        Ok(())
+    }
+}
+
+impl DrainExited<'_> {
+    pub fn into_iter(&mut self) -> impl Iterator<Item = WaitObject> + '_ {
+        // We don't propagate stop and continue to the new parent.
+        self.wait_procs
+            .drain(..)
+            .filter(|item| item.stopped().is_none() && !item.is_continue())
+    }
+}
+
+impl NotifyBatch<'_, '_, '_> {
+    pub fn notify(&mut self, wait: WaitObject) {
+        self.needs_notify = true;
+        self.wait_procs.push_back(wait);
+    }
+
+    /// Finish the batch and notify all if we have notified some processes.
+    pub fn finish(mut self, procs: RefPosition<'_, ProcessList>) {
+        if self.needs_notify {
+            self.cv.notify_all();
+            self.process.raise(Signal::SIGCHLD, procs);
+            self.needs_notify = false;
+        }
+    }
+}
+
+impl Drop for NotifyBatch<'_, '_, '_> {
+    fn drop(&mut self) {
+        if self.needs_notify {
+            panic!("NotifyBatch dropped without calling finish");
+        }
+    }
+}

+ 74 - 0
src/kernel/task/process_group.rs

@@ -0,0 +1,74 @@
+use alloc::{
+    collections::btree_map::BTreeMap,
+    sync::{Arc, Weak},
+};
+
+use crate::{
+    prelude::*,
+    sync::{RefMutPosition, RefPosition},
+};
+
+use super::{Process, ProcessList, Session, Signal};
+
+#[allow(dead_code)]
+#[derive(Debug)]
+pub struct ProcessGroup {
+    pub pgid: u32,
+    pub leader: Weak<Process>,
+    pub session: Weak<Session>,
+
+    pub processes: Locked<BTreeMap<u32, Weak<Process>>, ProcessList>,
+}
+
+impl ProcessGroup {
+    /// Don't use this function directly. Use `Session::new_group` instead.
+    pub(super) fn new(
+        leader: &Arc<Process>,
+        session: Weak<Session>,
+        procs: &mut ProcessList,
+    ) -> Arc<Self> {
+        let pgroup = Arc::new(Self {
+            pgid: leader.pid,
+            leader: Arc::downgrade(leader),
+            session,
+            processes: Locked::new(
+                BTreeMap::from([(leader.pid, Arc::downgrade(leader))]),
+                // SAFETY: `procs` must be the global process list, which won't be moved.
+                procs,
+            ),
+        });
+
+        procs.add_pgroup(&pgroup);
+        pgroup
+    }
+
+    pub(super) fn add_member(
+        &self,
+        process: &Arc<Process>,
+        procs: RefMutPosition<'_, ProcessList>,
+    ) {
+        assert!(self
+            .processes
+            .access_mut(procs)
+            .insert(process.pid, Arc::downgrade(process))
+            .is_none());
+    }
+
+    pub(super) fn remove_member(&self, pid: u32, procs: RefMutPosition<'_, ProcessList>) {
+        let processes = self.processes.access_mut(procs);
+        assert!(processes.remove(&pid).is_some());
+        if processes.is_empty() {
+            self.session
+                .upgrade()
+                .unwrap()
+                .remove_member(self.pgid, procs);
+        }
+    }
+
+    pub fn raise(&self, signal: Signal, procs: RefPosition<'_, ProcessList>) {
+        let processes = self.processes.access(procs);
+        for process in processes.values().map(|p| p.upgrade().unwrap()) {
+            process.raise(signal, procs);
+        }
+    }
+}

+ 217 - 0
src/kernel/task/process_list.rs

@@ -0,0 +1,217 @@
+use alloc::{
+    collections::btree_map::BTreeMap,
+    sync::{Arc, Weak},
+};
+
+use crate::{
+    prelude::*,
+    rcu::rcu_sync,
+    sync::{preempt, AsRefMutPosition as _, AsRefPosition as _},
+};
+
+use lazy_static::lazy_static;
+
+use super::{Process, ProcessGroup, Scheduler, Session, Signal, Thread, WaitObject, WaitType};
+
+pub struct ProcessList {
+    /// The init process.
+    init: Option<Arc<Process>>,
+    /// The kernel idle process.
+    idle: Option<Arc<Process>>,
+    /// All threads except the idle thread.
+    threads: BTreeMap<u32, Arc<Thread>>,
+    /// All processes except the idle process.
+    processes: BTreeMap<u32, Weak<Process>>,
+    /// All process groups.
+    pgroups: BTreeMap<u32, Weak<ProcessGroup>>,
+    /// All sessions.
+    sessions: BTreeMap<u32, Weak<Session>>,
+}
+
+lazy_static! {
+    static ref GLOBAL_PROC_LIST: RwSemaphore<ProcessList> = {
+        RwSemaphore::new(ProcessList {
+            init: None,
+            idle: None,
+            threads: BTreeMap::new(),
+            processes: BTreeMap::new(),
+            pgroups: BTreeMap::new(),
+            sessions: BTreeMap::new(),
+        })
+    };
+}
+
+impl ProcessList {
+    pub fn get() -> &'static RwSemaphore<Self> {
+        &GLOBAL_PROC_LIST
+    }
+
+    pub fn add_session(&mut self, session: &Arc<Session>) {
+        self.sessions.insert(session.sid, Arc::downgrade(session));
+    }
+
+    pub fn add_pgroup(&mut self, pgroup: &Arc<ProcessGroup>) {
+        self.pgroups.insert(pgroup.pgid, Arc::downgrade(pgroup));
+    }
+
+    pub fn add_process(&mut self, process: &Arc<Process>) {
+        self.processes.insert(process.pid, Arc::downgrade(process));
+    }
+
+    pub fn add_thread(&mut self, thread: &Arc<Thread>) {
+        self.threads.insert(thread.tid, thread.clone());
+    }
+
+    pub fn kill_current(signal: Signal) -> ! {
+        ProcessList::get()
+            .lock()
+            .do_kill_process(&Thread::current().process, WaitType::Signaled(signal));
+        Scheduler::schedule_noreturn()
+    }
+
+    pub fn remove_process(&mut self, pid: u32) {
+        // Thread group leader has the same tid as the pid.
+        if let Some(thread) = self.threads.remove(&pid) {
+            self.processes.remove(&pid);
+
+            // SAFETY: We wait until all references are dropped below with `rcu_sync()`.
+            let session = unsafe { thread.process.session.swap(None) }.unwrap();
+            let pgroup = unsafe { thread.process.pgroup.swap(None) }.unwrap();
+            let _parent = unsafe { thread.process.parent.swap(None) }.unwrap();
+            pgroup.remove_member(pid, self.as_pos_mut());
+            rcu_sync();
+
+            if Arc::strong_count(&pgroup) == 1 {
+                self.pgroups.remove(&pgroup.pgid);
+            }
+
+            if Arc::strong_count(&session) == 1 {
+                self.sessions.remove(&session.sid);
+            }
+        } else {
+            panic!("Process {} not found", pid);
+        }
+    }
+
+    pub fn init_process(&self) -> &Arc<Process> {
+        self.init.as_ref().unwrap()
+    }
+
+    pub fn idle_process(&self) -> &Arc<Process> {
+        self.idle.as_ref().unwrap()
+    }
+
+    pub fn try_find_thread(&self, tid: u32) -> Option<&Arc<Thread>> {
+        self.threads.get(&tid)
+    }
+
+    pub fn try_find_process(&self, pid: u32) -> Option<Arc<Process>> {
+        self.processes.get(&pid).and_then(Weak::upgrade)
+    }
+
+    pub fn try_find_pgroup(&self, pgid: u32) -> Option<Arc<ProcessGroup>> {
+        self.pgroups.get(&pgid).and_then(Weak::upgrade)
+    }
+
+    pub fn try_find_session(&self, sid: u32) -> Option<Arc<Session>> {
+        self.sessions.get(&sid).and_then(Weak::upgrade)
+    }
+
+    /// Make the process a zombie and notify the parent.
+    pub fn do_kill_process(&mut self, process: &Arc<Process>, status: WaitType) {
+        if self.idle_process().pid == process.pid {
+            panic!("idle exited");
+        }
+
+        if self.init_process().pid == process.pid {
+            panic!("init exited");
+        }
+
+        preempt::disable();
+
+        let inner = process.inner.access_mut(self.as_pos_mut());
+        // TODO!!!!!!: When we are killing multiple threads, we need to wait until all
+        // the threads are stopped then proceed.
+        for thread in inner.threads.values().map(|t| t.upgrade().unwrap()) {
+            assert!(thread.tid == Thread::current().tid);
+            Scheduler::get().lock().set_zombie(&thread);
+            thread.files.close_all();
+        }
+
+        // If we are the session leader, we should drop the control terminal.
+        if process.session(self.as_pos()).sid == process.pid {
+            if let Some(terminal) = process.session(self.as_pos()).drop_control_terminal() {
+                terminal.drop_session();
+            }
+        }
+
+        // Unmap all user memory areas
+        process.mm_list.clear_user();
+
+        // Make children orphans (adopted by init)
+        {
+            let init = self.init_process();
+            inner.children.retain(|_, child| {
+                let child = child.upgrade().unwrap();
+                // SAFETY: `child.parent` must be ourself. So we don't need to free it.
+                unsafe { child.parent.swap(Some(init.clone())) };
+                init.add_child(&child, self.as_pos_mut());
+
+                false
+            });
+        }
+
+        let mut init_notify = self.init_process().notify_batch();
+        process
+            .wait_list
+            .drain_exited()
+            .into_iter()
+            .for_each(|item| init_notify.notify(item));
+        init_notify.finish(self.as_pos());
+
+        process.parent(self.as_pos()).notify(
+            WaitObject {
+                pid: process.pid,
+                code: status,
+            },
+            self.as_pos(),
+        );
+
+        preempt::enable();
+    }
+}
+
+pub unsafe fn init_multitasking(init_fn: unsafe extern "C" fn()) {
+    let mut procs = ProcessList::get().lock();
+
+    let init_process = Process::new_for_init(1, procs.as_mut());
+    let init_thread = Thread::new_for_init(
+        Arc::from(b"[kernel kinit]".as_slice()),
+        1,
+        &init_process,
+        procs.as_mut(),
+    );
+
+    let init_session = Session::new(procs.as_mut(), &init_process);
+    let init_pgroup = init_session.new_group(procs.as_mut(), &init_process);
+
+    assert!(init_process.session.swap(Some(init_session)).is_none());
+    assert!(init_process.pgroup.swap(Some(init_pgroup)).is_none());
+
+    let idle_process = Process::new_for_init(0, procs.as_mut());
+    let idle_thread = Thread::new_for_init(
+        Arc::from(b"[kernel idle#BS]".as_slice()),
+        0,
+        &idle_process,
+        procs.as_mut(),
+    );
+
+    procs.init = Some(init_process);
+    procs.idle = Some(idle_process);
+
+    let mut scheduler = Scheduler::get().lock_irq();
+
+    init_thread.init(init_fn as usize);
+    scheduler.uwake(&init_thread);
+    Scheduler::set_idle_and_current(idle_thread);
+}

+ 51 - 60
src/kernel/task/scheduler.rs

@@ -5,16 +5,13 @@ use core::{
 
 use crate::{prelude::*, sync::preempt};
 
-use alloc::{
-    collections::vec_deque::VecDeque,
-    sync::{Arc, Weak},
-};
+use alloc::{collections::vec_deque::VecDeque, sync::Arc};
 use lazy_static::lazy_static;
 
 use super::{Thread, ThreadState};
 
 pub struct Scheduler {
-    ready: VecDeque<Weak<Thread>>,
+    ready: VecDeque<Arc<Thread>>,
 }
 
 /// Idle task thread
@@ -24,7 +21,7 @@ static IDLE_TASK: Option<NonNull<Thread>> = None;
 
 /// Current thread
 #[arch::define_percpu]
-static mut CURRENT: Option<NonNull<Thread>> = None;
+static CURRENT: Option<NonNull<Thread>> = None;
 
 lazy_static! {
     static ref GLOBAL_SCHEDULER: Spin<Scheduler> = Spin::new(Scheduler {
@@ -58,26 +55,51 @@ impl Scheduler {
         BorrowedArc::from_raw(IDLE_TASK.get().unwrap().as_ptr())
     }
 
-    pub unsafe fn set_idle(thread: Arc<Thread>) {
+    pub unsafe fn set_idle_and_current(thread: Arc<Thread>) {
         // We don't wake the idle thread to prevent from accidentally being scheduled there.
         thread.init(idle_task as *const () as usize);
+        assert_eq!(
+            thread.oncpu.swap(true, Ordering::AcqRel),
+            false,
+            "Idle task is already on cpu"
+        );
 
-        let old = IDLE_TASK.swap(NonNull::new(Arc::into_raw(thread) as *mut _));
+        let old = IDLE_TASK.swap(NonNull::new(Arc::into_raw(thread.clone()) as *mut _));
         assert!(old.is_none(), "Idle task is already set");
-    }
 
-    pub unsafe fn set_current(thread: Arc<Thread>) {
-        assert_eq!(thread.oncpu.swap(true, Ordering::AcqRel), false);
         let old = CURRENT.swap(NonNull::new(Arc::into_raw(thread) as *mut _));
+        assert!(old.is_none(), "Current is already set");
+    }
+
+    pub fn pop(&mut self) -> Option<Arc<Thread>> {
+        self.ready.pop_front()
+    }
+
+    pub unsafe fn swap_current(&mut self, next: Arc<Thread>) {
+        {
+            let mut next_state = next.state.lock();
+            assert_eq!(*next_state, ThreadState::Ready);
+            *next_state = ThreadState::Running;
+            assert_eq!(next.oncpu.swap(true, Ordering::AcqRel), false);
+        }
+
+        let old: Option<NonNull<Thread>> =
+            CURRENT.swap(NonNull::new(Arc::into_raw(next) as *mut _));
 
         if let Some(thread_pointer) = old {
             let thread = Arc::from_raw(thread_pointer.as_ptr());
-            thread.oncpu.store(false, Ordering::Release);
+            let mut state = thread.state.lock();
+            assert_eq!(thread.oncpu.swap(false, Ordering::AcqRel), true);
+
+            if let ThreadState::Running = *state {
+                *state = ThreadState::Ready;
+                self.enqueue(&thread);
+            }
         }
     }
 
     fn enqueue(&mut self, thread: &Arc<Thread>) {
-        self.ready.push_back(Arc::downgrade(thread));
+        self.ready.push_back(thread.clone());
     }
 
     pub fn usleep(&mut self, thread: &Arc<Thread>) {
@@ -125,24 +147,6 @@ impl Scheduler {
         }
     }
 
-    /// Put `Running` thread into `Ready` state and enqueue the task.
-    pub fn put_ready(&mut self, thread: &Arc<Thread>) {
-        let mut state = thread.state.lock();
-        assert_eq!(*state, ThreadState::Running);
-
-        *state = ThreadState::Ready;
-        self.enqueue(&thread);
-    }
-
-    /// Set `Ready` threads to the `Running` state.
-    pub fn set_running(&mut self, thread: &Arc<Thread>) {
-        let mut state = thread.state.lock();
-        assert_eq!(*state, ThreadState::Ready);
-
-        *state = ThreadState::Running;
-        // No need to dequeue. We got the thread from the queue.
-    }
-
     /// Set `Running` threads to the `Zombie` state.
     pub fn set_zombie(&mut self, thread: &Arc<Thread>) {
         let mut state = thread.state.lock();
@@ -197,38 +201,25 @@ extern "C" fn idle_task() {
         let mut scheduler = Scheduler::get().lock_irq();
         let state = *Thread::current().state.lock();
 
-        // Previous thread is `Running`.
-        if let ThreadState::Running = state {
-            // No other thread to run, return to current running thread without changing its state.
-            if scheduler.ready.is_empty() {
+        // No other thread to run
+        match scheduler.pop() {
+            None => {
                 drop(scheduler);
-                context_switch_light(&Scheduler::idle_task(), &Thread::current());
+                if let ThreadState::Running = state {
+                    // Previous thread is `Running`, Return to current running thread
+                    // without changing its state.
+                    context_switch_light(&Scheduler::idle_task(), &Thread::current());
+                } else {
+                    // Halt the cpu and rerun the loop.
+                    arch::halt();
+                }
                 continue;
-            } else {
-                // Put it into `Ready` state
-                scheduler.put_ready(&Thread::current());
             }
-        }
-
-        // No thread to run, halt the cpu and rerun the loop.
-        if scheduler.ready.is_empty() {
-            drop(scheduler);
-            arch::halt();
-            continue;
-        }
-
-        let next_thread = scheduler
-            .ready
-            .pop_front()
-            .as_ref()
-            .map(|weak| weak.upgrade().unwrap())
-            .expect("We should have a thread to run");
-        scheduler.set_running(&next_thread);
-        drop(scheduler);
-
-        next_thread.process.mm_list.switch_page_table();
-        unsafe {
-            Scheduler::set_current(next_thread);
+            Some(next) => {
+                next.process.mm_list.switch_page_table();
+                unsafe { scheduler.swap_current(next) };
+                drop(scheduler);
+            }
         }
 
         unsafe {

+ 129 - 0
src/kernel/task/session.rs

@@ -0,0 +1,129 @@
+use alloc::{
+    collections::btree_map::BTreeMap,
+    sync::{Arc, Weak},
+};
+use bindings::EPERM;
+
+use crate::{
+    kernel::Terminal,
+    prelude::*,
+    sync::{AsRefMutPosition as _, AsRefPosition as _, RefMutPosition, RefPosition},
+};
+
+use super::{Process, ProcessGroup, ProcessList, Signal, Thread};
+
+#[derive(Debug)]
+struct SessionJobControl {
+    /// Foreground process group
+    foreground: Weak<ProcessGroup>,
+    control_terminal: Option<Arc<Terminal>>,
+}
+
+#[allow(dead_code)]
+#[derive(Debug)]
+pub struct Session {
+    pub sid: u32,
+    pub leader: Weak<Process>,
+    job_control: RwSemaphore<SessionJobControl>,
+
+    groups: Locked<BTreeMap<u32, Weak<ProcessGroup>>, ProcessList>,
+}
+
+impl Session {
+    /// Create a session and add it to the global session list.
+    pub(super) fn new(procs: &mut ProcessList, leader: &Arc<Process>) -> Arc<Self> {
+        let session = Arc::new(Self {
+            sid: leader.pid,
+            leader: Arc::downgrade(leader),
+            job_control: RwSemaphore::new(SessionJobControl {
+                foreground: Weak::new(),
+                control_terminal: None,
+            }),
+            groups: Locked::new(
+                BTreeMap::new(),
+                // SAFETY: `procs` must be the global process list, which won't be moved.
+                procs,
+            ),
+        });
+
+        procs.add_session(&session);
+        session
+    }
+
+    pub(super) fn new_group(
+        self: &Arc<Self>,
+        procs: &mut ProcessList,
+        leader: &Arc<Process>,
+    ) -> Arc<ProcessGroup> {
+        let pgroup = ProcessGroup::new(leader, Arc::downgrade(self), procs);
+        let groups = self.groups.access_mut(procs.as_pos_mut());
+        assert!(groups
+            .insert(pgroup.pgid, Arc::downgrade(&pgroup))
+            .is_none());
+
+        pgroup
+    }
+
+    pub(super) fn remove_member(&self, pgid: u32, procs: RefMutPosition<'_, ProcessList>) {
+        assert!(self.groups.access_mut(procs).remove(&pgid).is_some());
+    }
+
+    pub fn foreground(&self) -> Option<Arc<ProcessGroup>> {
+        self.job_control.lock_shared().foreground.upgrade()
+    }
+
+    /// Set the foreground process group identified by `pgid`.
+    /// The process group must belong to the session.
+    pub fn set_foreground_pgid(
+        &self,
+        pgid: u32,
+        procs: RefPosition<'_, ProcessList>,
+    ) -> KResult<()> {
+        if let Some(group) = self.groups.access(procs).get(&pgid) {
+            self.job_control.lock().foreground = group.clone();
+            Ok(())
+        } else {
+            // TODO: Check if the process group refers to an existing process group.
+            //       That's not a problem though, the operation will fail anyway.
+            Err(EPERM)
+        }
+    }
+
+    /// Only session leaders can set the control terminal.
+    /// Make sure we've checked that before calling this function.
+    pub fn set_control_terminal(
+        self: &Arc<Self>,
+        terminal: &Arc<Terminal>,
+        forced: bool,
+        procs: RefPosition<'_, ProcessList>,
+    ) -> KResult<()> {
+        let mut job_control = self.job_control.lock();
+        if let Some(_) = job_control.control_terminal.as_ref() {
+            if let Some(session) = terminal.session().as_ref() {
+                if session.sid == self.sid {
+                    return Ok(());
+                }
+            }
+            return Err(EPERM);
+        }
+        terminal.set_session(self, forced)?;
+        job_control.control_terminal = Some(terminal.clone());
+        job_control.foreground = Arc::downgrade(&Thread::current().process.pgroup(procs));
+        Ok(())
+    }
+
+    /// Drop the control terminal reference inside the session.
+    /// DO NOT TOUCH THE TERMINAL'S SESSION FIELD.
+    pub fn drop_control_terminal(&self) -> Option<Arc<Terminal>> {
+        let mut inner = self.job_control.lock();
+        inner.foreground = Weak::new();
+        inner.control_terminal.take()
+    }
+
+    pub fn raise_foreground(&self, signal: Signal) {
+        if let Some(fg) = self.foreground() {
+            let procs = ProcessList::get().lock_shared();
+            fg.raise(signal, procs.as_pos());
+        }
+    }
+}

+ 42 - 853
src/kernel/task/thread.rs

@@ -1,31 +1,22 @@
 use core::{
     arch::naked_asm,
     cell::{RefCell, UnsafeCell},
-    cmp,
-    sync::atomic::{self, AtomicBool, AtomicU32},
+    sync::atomic::AtomicBool,
 };
 
 use crate::{
-    kernel::{
-        cpu::current_cpu, mem::MMList, terminal::Terminal, user::dataflow::CheckedUserPointer,
-        vfs::FsContext,
-    },
+    kernel::{cpu::current_cpu, user::dataflow::CheckedUserPointer, vfs::FsContext},
     prelude::*,
-    sync::{preempt, CondVar, SpinGuard},
+    sync::{preempt, AsRefMutPosition as _, AsRefPosition as _},
 };
 
-use alloc::{
-    collections::{btree_map::BTreeMap, vec_deque::VecDeque},
-    sync::{Arc, Weak},
-};
-use bindings::{ECHILD, EINTR, EPERM, ESRCH};
-use lazy_static::lazy_static;
+use alloc::sync::Arc;
 
 use crate::kernel::vfs::filearray::FileArray;
 
 use super::{
     signal::{RaiseResult, Signal, SignalList},
-    KernelStack, Scheduler,
+    KernelStack, Process, ProcessList, Scheduler, WaitObject, WaitType,
 };
 
 use arch::{InterruptContext, TaskContext, UserTLS};
@@ -40,152 +31,6 @@ pub enum ThreadState {
     USleep,
 }
 
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-pub enum WaitType {
-    Exited(u32),
-    Signaled(Signal),
-    Stopped(Signal),
-    Continued,
-}
-
-#[derive(Debug, Clone, Copy)]
-pub struct WaitObject {
-    pub pid: u32,
-    pub code: WaitType,
-}
-
-impl WaitType {
-    pub fn to_wstatus(self) -> u32 {
-        match self {
-            WaitType::Exited(status) => (status & 0xff) << 8,
-            WaitType::Signaled(signal) if signal.is_coredump() => signal.to_signum() | 0x80,
-            WaitType::Signaled(signal) => signal.to_signum(),
-            WaitType::Stopped(signal) => 0x7f | (signal.to_signum() << 8),
-            WaitType::Continued => 0xffff,
-        }
-    }
-}
-
-impl WaitObject {
-    pub fn stopped(&self) -> Option<Signal> {
-        if let WaitType::Stopped(signal) = self.code {
-            Some(signal)
-        } else {
-            None
-        }
-    }
-
-    pub fn is_continue(&self) -> bool {
-        matches!(self.code, WaitType::Continued)
-    }
-}
-
-#[derive(Debug)]
-struct SessionInner {
-    /// Foreground process group
-    foreground: Weak<ProcessGroup>,
-    control_terminal: Option<Arc<Terminal>>,
-    groups: BTreeMap<u32, Weak<ProcessGroup>>,
-}
-
-#[allow(dead_code)]
-#[derive(Debug)]
-pub struct Session {
-    sid: u32,
-    leader: Weak<Process>,
-
-    inner: Spin<SessionInner>,
-}
-
-#[allow(dead_code)]
-#[derive(Debug)]
-pub struct ProcessGroup {
-    pgid: u32,
-    leader: Weak<Process>,
-    session: Weak<Session>,
-
-    processes: Spin<BTreeMap<u32, Weak<Process>>>,
-}
-
-#[derive(Debug)]
-struct ProcessInner {
-    /// Parent process
-    ///
-    /// Parent process must be valid during the whole life of the process.
-    /// The only case that parent process may be `None` is when this is the init process
-    /// or the process is kernel thread.
-    parent: Option<Arc<Process>>,
-
-    /// Process group
-    pgroup: Arc<ProcessGroup>,
-
-    /// Session
-    session: Arc<Session>,
-
-    /// Children list
-    children: BTreeMap<u32, Weak<Thread>>,
-
-    /// Thread list
-    threads: BTreeMap<u32, Weak<Thread>>,
-}
-
-#[derive(Debug)]
-pub struct WaitList {
-    wait_procs: Spin<VecDeque<WaitObject>>,
-    cv_wait_procs: CondVar,
-    process: Weak<Process>,
-}
-
-pub struct NotifyBatch<'waitlist, 'cv, 'process> {
-    wait_procs: SpinGuard<'waitlist, VecDeque<WaitObject>>,
-    cv: &'cv CondVar,
-    process: &'process Weak<Process>,
-    needs_notify: bool,
-}
-
-pub struct Entry<'waitlist, 'cv> {
-    wait_procs: SpinGuard<'waitlist, VecDeque<WaitObject>>,
-    cv: &'cv CondVar,
-    want_stop: bool,
-    want_continue: bool,
-}
-
-pub struct DrainExited<'waitlist> {
-    wait_procs: SpinGuard<'waitlist, VecDeque<WaitObject>>,
-}
-
-#[derive(Debug)]
-pub struct Process {
-    /// Process id
-    ///
-    /// This should never change during the life of the process.
-    pub pid: u32,
-
-    pub wait_list: WaitList,
-    pub mm_list: Arc<MMList>,
-    inner: Spin<ProcessInner>,
-}
-
-impl PartialOrd for Process {
-    fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
-        self.pid.partial_cmp(&other.pid)
-    }
-}
-
-impl Ord for Process {
-    fn cmp(&self, other: &Self) -> cmp::Ordering {
-        self.pid.cmp(&other.pid)
-    }
-}
-
-impl PartialEq for Process {
-    fn eq(&self, other: &Self) -> bool {
-        self.pid == other.pid
-    }
-}
-
-impl Eq for Process {}
-
 #[derive(Debug)]
 struct ThreadInner {
     /// Thread name
@@ -226,26 +71,6 @@ pub struct Thread {
     inner: Spin<ThreadInner>,
 }
 
-impl PartialOrd for Thread {
-    fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
-        self.tid.partial_cmp(&other.tid)
-    }
-}
-
-impl Ord for Thread {
-    fn cmp(&self, other: &Self) -> cmp::Ordering {
-        self.tid.cmp(&other.tid)
-    }
-}
-
-impl PartialEq for Thread {
-    fn eq(&self, other: &Self) -> bool {
-        self.tid == other.tid
-    }
-}
-
-impl Eq for Thread {}
-
 #[repr(transparent)]
 #[derive(Debug, Clone, Copy)]
 pub struct UserDescriptorFlags(u32);
@@ -259,520 +84,6 @@ pub struct UserDescriptor {
     flags: UserDescriptorFlags,
 }
 
-pub struct ProcessList {
-    init: Arc<Process>,
-    threads: Spin<BTreeMap<u32, Arc<Thread>>>,
-    processes: Spin<BTreeMap<u32, Weak<Process>>>,
-    pgroups: Spin<BTreeMap<u32, Weak<ProcessGroup>>>,
-    sessions: Spin<BTreeMap<u32, Weak<Session>>>,
-}
-
-impl Session {
-    fn new(sid: u32, leader: Weak<Process>) -> Arc<Self> {
-        Arc::new(Self {
-            sid,
-            leader,
-            inner: Spin::new(SessionInner {
-                foreground: Weak::new(),
-                control_terminal: None,
-                groups: BTreeMap::new(),
-            }),
-        })
-    }
-
-    fn add_member(&self, pgroup: &Arc<ProcessGroup>) {
-        self.inner
-            .lock()
-            .groups
-            .insert(pgroup.pgid, Arc::downgrade(pgroup));
-    }
-
-    pub fn foreground_pgid(&self) -> Option<u32> {
-        self.inner.lock().foreground.upgrade().map(|fg| fg.pgid)
-    }
-
-    /// Set the foreground process group.
-    pub fn set_foreground_pgid(&self, pgid: u32) -> KResult<()> {
-        let mut inner = self.inner.lock();
-        let group = inner.groups.get(&pgid);
-
-        if let Some(group) = group {
-            inner.foreground = group.clone();
-            Ok(())
-        } else {
-            // TODO!!!: Check if the process group is valid.
-            //          We assume that the process group is valid for now.
-            Err(EPERM)
-        }
-    }
-
-    /// Only session leaders can set the control terminal.
-    /// Make sure we've checked that before calling this function.
-    pub fn set_control_terminal(
-        self: &Arc<Self>,
-        terminal: &Arc<Terminal>,
-        forced: bool,
-    ) -> KResult<()> {
-        let mut inner = self.inner.lock();
-        if let Some(_) = inner.control_terminal.as_ref() {
-            if let Some(session) = terminal.session().as_ref() {
-                if session.sid == self.sid {
-                    return Ok(());
-                }
-            }
-            return Err(EPERM);
-        }
-        terminal.set_session(self, forced)?;
-        inner.control_terminal = Some(terminal.clone());
-        inner.foreground = Arc::downgrade(&Thread::current().process.pgroup());
-        Ok(())
-    }
-
-    /// Drop the control terminal reference inside the session.
-    /// DO NOT TOUCH THE TERMINAL'S SESSION FIELD.
-    pub fn drop_control_terminal(&self) -> Option<Arc<Terminal>> {
-        let mut inner = self.inner.lock();
-        inner.foreground = Weak::new();
-        inner.control_terminal.take()
-    }
-
-    pub fn raise_foreground(&self, signal: Signal) {
-        if let Some(fg) = self.inner.lock().foreground.upgrade() {
-            fg.raise(signal);
-        }
-    }
-}
-
-impl ProcessGroup {
-    fn new_for_init(pgid: u32, leader: Weak<Process>, session: Weak<Session>) -> Arc<Self> {
-        Arc::new(Self {
-            pgid,
-            leader: leader.clone(),
-            session,
-            processes: Spin::new(BTreeMap::from([(pgid, leader)])),
-        })
-    }
-
-    fn new(leader: &Arc<Process>, session: &Arc<Session>) -> Arc<Self> {
-        let pgroup = Arc::new(Self {
-            pgid: leader.pid,
-            leader: Arc::downgrade(leader),
-            session: Arc::downgrade(session),
-            processes: Spin::new(BTreeMap::from([(leader.pid, Arc::downgrade(leader))])),
-        });
-
-        session.add_member(&pgroup);
-        pgroup
-    }
-}
-
-impl Drop for Thread {
-    fn drop(&mut self) {
-        let mut process = self.process.inner.lock();
-
-        process.threads.remove(&self.tid);
-        if let Some(parent) = &process.parent {
-            parent.inner.lock().children.remove(&self.tid);
-        }
-    }
-}
-
-impl Drop for Process {
-    fn drop(&mut self) {
-        let inner = self.inner.lock();
-        assert!(inner.children.is_empty());
-
-        inner.pgroup.processes.lock().remove(&self.pid);
-        ProcessList::get().processes.lock().remove(&self.pid);
-    }
-}
-
-impl Drop for ProcessGroup {
-    fn drop(&mut self) {
-        if let Some(session) = self.session.upgrade() {
-            session.inner.lock().groups.remove(&self.pgid);
-        }
-    }
-}
-
-lazy_static! {
-    static ref GLOBAL_PROC_LIST: ProcessList = unsafe {
-        let init_process = Process::new_for_init(1, None);
-        let init_thread = Thread::new_for_init(b"[kernel kinit]".as_slice().into(), &init_process);
-        Scheduler::set_current(init_thread.clone());
-
-        let idle_process = Process::new_for_init(0, None);
-        let idle_thread =
-            Thread::new_for_init(b"[kernel idle#BS]".as_slice().into(), &idle_process);
-        Scheduler::set_idle(idle_thread.clone());
-
-        let init_session_weak = Arc::downgrade(&init_process.inner.lock().session);
-        let init_pgroup_weak = Arc::downgrade(&init_process.inner.lock().pgroup);
-
-        ProcessList {
-            sessions: Spin::new(BTreeMap::from([(1, init_session_weak)])),
-            pgroups: Spin::new(BTreeMap::from([(1, init_pgroup_weak)])),
-            threads: Spin::new(BTreeMap::from([
-                (1, init_thread.clone()),
-                (0, idle_thread.clone()),
-            ])),
-            processes: Spin::new(BTreeMap::from([
-                (1, Arc::downgrade(&init_process)),
-                (0, Arc::downgrade(&idle_process)),
-            ])),
-            init: init_process,
-        }
-    };
-}
-impl ProcessList {
-    pub fn get() -> &'static Self {
-        &GLOBAL_PROC_LIST
-    }
-
-    pub fn add_session(&self, session: &Arc<Session>) {
-        self.sessions
-            .lock()
-            .insert(session.sid, Arc::downgrade(session));
-    }
-
-    pub fn add_pgroup(&self, pgroup: &Arc<ProcessGroup>) {
-        self.pgroups
-            .lock()
-            .insert(pgroup.pgid, Arc::downgrade(pgroup));
-    }
-
-    pub fn add_process(&self, process: &Arc<Process>) {
-        self.processes
-            .lock()
-            .insert(process.pid, Arc::downgrade(process));
-    }
-
-    pub fn add_thread(&self, thread: &Arc<Thread>) {
-        self.threads.lock().insert(thread.tid, thread.clone());
-    }
-
-    pub fn kill_current(signal: Signal) -> ! {
-        ProcessList::get().do_kill_process(&Thread::current().process, WaitType::Signaled(signal));
-        Scheduler::schedule_noreturn()
-    }
-
-    // TODO!!!!!!: Reconsider this
-    fn remove(&self, tid: u32) {
-        if let None = self.threads.lock().remove(&tid) {
-            panic!("Thread {} not found", tid);
-        }
-    }
-
-    pub fn try_find_process(&self, pid: u32) -> Option<Arc<Process>> {
-        self.processes.lock().get(&pid).and_then(Weak::upgrade)
-    }
-
-    pub fn try_find_thread(&self, tid: u32) -> Option<Arc<Thread>> {
-        self.threads.lock().get(&tid).cloned()
-    }
-
-    pub fn try_find_pgroup(&self, pgid: u32) -> Option<Arc<ProcessGroup>> {
-        self.pgroups.lock().get(&pgid).and_then(Weak::upgrade)
-    }
-
-    pub fn try_find_session(&self, sid: u32) -> Option<Arc<Session>> {
-        self.sessions.lock().get(&sid).and_then(Weak::upgrade)
-    }
-
-    /// Make the process a zombie and notify the parent.
-    pub fn do_kill_process(&self, process: &Arc<Process>, status: WaitType) {
-        if &self.init == process {
-            panic!("init exited");
-        }
-
-        preempt::disable();
-
-        let mut inner = process.inner.lock();
-        // TODO!!!!!!: When we are killing multiple threads, we need to wait until all
-        // the threads are stopped then proceed.
-        for thread in inner.threads.values().map(|t| t.upgrade().unwrap()) {
-            assert!(&thread == Thread::current().as_ref());
-            Scheduler::get().lock().set_zombie(&thread);
-            thread.files.close_all();
-        }
-
-        // If we are the session leader, we should drop the control terminal.
-        if inner.session.sid == process.pid {
-            if let Some(terminal) = inner.session.drop_control_terminal() {
-                terminal.drop_session();
-            }
-        }
-
-        // Unmap all user memory areas
-        process.mm_list.clear_user();
-
-        // Make children orphans (adopted by init)
-        {
-            let mut init_inner = self.init.inner.lock();
-
-            inner.children.retain(|_, child| {
-                let child = child.upgrade().unwrap();
-                child.process.inner.lock().parent = Some(self.init.clone());
-                init_inner.add_child(&child);
-
-                false
-            });
-        }
-
-        let mut init_notify = self.init.wait_list.notify_batch();
-        process
-            .wait_list
-            .drain_exited()
-            .into_iter()
-            .for_each(|item| init_notify.notify(item));
-        init_notify.finish();
-
-        inner.parent.as_ref().unwrap().wait_list.notify(WaitObject {
-            pid: process.pid,
-            code: status,
-        });
-
-        preempt::enable();
-    }
-}
-
-impl ProcessGroup {
-    fn add_member(&self, process: &Arc<Process>) {
-        self.processes
-            .lock()
-            .insert(process.pid, Arc::downgrade(process));
-    }
-
-    fn remove_member(&self, pid: u32) {
-        self.processes.lock().remove(&pid);
-    }
-
-    pub fn raise(&self, signal: Signal) {
-        let processes = self.processes.lock();
-        for process in processes.values().map(|p| p.upgrade().unwrap()) {
-            process.raise(signal);
-        }
-    }
-}
-
-impl ProcessInner {
-    fn add_child(&mut self, child: &Arc<Thread>) {
-        self.children.insert(child.tid, Arc::downgrade(child));
-    }
-
-    fn add_thread(&mut self, thread: &Arc<Thread>) {
-        self.threads.insert(thread.tid, Arc::downgrade(thread));
-    }
-}
-
-/// PID 0 and 1 is created manually so we start from 2.
-static NEXT_PID: AtomicU32 = AtomicU32::new(2);
-impl Process {
-    fn alloc_pid() -> u32 {
-        NEXT_PID.fetch_add(1, atomic::Ordering::Relaxed)
-    }
-
-    pub fn new_cloned(other: &Arc<Self>) -> Arc<Self> {
-        let other_inner = other.inner.lock();
-
-        let process = Arc::new_cyclic(|weak| Self {
-            pid: Self::alloc_pid(),
-            wait_list: WaitList::new(weak.clone()),
-            mm_list: MMList::new_cloned(&other.mm_list),
-            inner: Spin::new(ProcessInner {
-                pgroup: other_inner.pgroup.clone(),
-                session: other_inner.session.clone(),
-                children: BTreeMap::new(),
-                threads: BTreeMap::new(),
-                parent: Some(other.clone()),
-            }),
-        });
-
-        ProcessList::get().add_process(&process);
-        other_inner.pgroup.add_member(&process);
-        process
-    }
-
-    unsafe fn new_for_init(pid: u32, parent: Option<Arc<Self>>) -> Arc<Self> {
-        let process = Arc::new_cyclic(|weak| {
-            let session = Session::new(pid, weak.clone());
-            let pgroup = ProcessGroup::new_for_init(pid, weak.clone(), Arc::downgrade(&session));
-
-            session.add_member(&pgroup);
-            Self {
-                pid,
-                wait_list: WaitList::new(weak.clone()),
-                mm_list: MMList::new(),
-                inner: Spin::new(ProcessInner {
-                    parent,
-                    pgroup,
-                    session,
-                    children: BTreeMap::new(),
-                    threads: BTreeMap::new(),
-                }),
-            }
-        });
-
-        process.inner.lock().pgroup.add_member(&process);
-        process
-    }
-
-    pub fn raise(&self, signal: Signal) {
-        let inner = self.inner.lock();
-        for thread in inner.threads.values().map(|t| t.upgrade().unwrap()) {
-            if let RaiseResult::Finished = thread.raise(signal) {
-                break;
-            }
-        }
-    }
-
-    fn add_child(&self, child: &Arc<Thread>) {
-        self.inner.lock().add_child(child);
-    }
-
-    fn add_thread(&self, thread: &Arc<Thread>) {
-        self.inner.lock().add_thread(thread);
-    }
-
-    pub fn wait(
-        &self,
-        no_block: bool,
-        trace_stop: bool,
-        trace_continue: bool,
-    ) -> KResult<Option<WaitObject>> {
-        let mut waits = self.wait_list.entry(trace_stop, trace_continue);
-        let wait_object = loop {
-            if let Some(object) = waits.get() {
-                break object;
-            }
-
-            if self.inner.lock().children.is_empty() {
-                return Err(ECHILD);
-            }
-
-            if no_block {
-                return Ok(None);
-            }
-
-            waits.wait()?;
-        };
-
-        if wait_object.stopped().is_some() || wait_object.is_continue() {
-            Ok(Some(wait_object))
-        } else {
-            ProcessList::get().remove(wait_object.pid);
-            Ok(Some(wait_object))
-        }
-    }
-
-    /// Create a new session for the process.
-    pub fn setsid(self: &Arc<Self>) -> KResult<u32> {
-        let mut inner = self.inner.lock();
-        // If there exists a session that has the same sid as our pid, we can't create a new
-        // session. The standard says that we should create a new process group and be the
-        // only process in the new process group and session.
-        if ProcessList::get().try_find_session(self.pid).is_some() {
-            return Err(EPERM);
-        }
-        inner.session = Session::new(self.pid, Arc::downgrade(self));
-        ProcessList::get().add_session(&inner.session);
-
-        inner.pgroup.remove_member(self.pid);
-        inner.pgroup = ProcessGroup::new(self, &inner.session);
-        ProcessList::get().add_pgroup(&inner.pgroup);
-
-        Ok(inner.pgroup.pgid)
-    }
-
-    /// Set the process group id of the process to `pgid`.
-    ///
-    /// This function does the actual work.
-    fn do_setpgid(self: &Arc<Self>, pgid: u32) -> KResult<()> {
-        let mut inner = self.inner.lock();
-
-        // Changing the process group of a session leader is not allowed.
-        if inner.session.sid == self.pid {
-            return Err(EPERM);
-        }
-
-        // Move us to an existing process group.
-        if let Some(pgroup) = ProcessList::get().try_find_pgroup(pgid) {
-            // Move the process to a process group in a different session in not allowed.
-            if pgroup.session.upgrade().unwrap().sid != inner.session.sid {
-                return Err(EPERM);
-            }
-
-            // If we are already in the process group, we are done.
-            if pgroup.pgid == inner.pgroup.pgid {
-                return Ok(());
-            }
-
-            inner.pgroup.remove_member(self.pid);
-            inner.pgroup = pgroup;
-        } else {
-            // Create a new process group only if `pgid` matches our `pid`.
-            if pgid != self.pid {
-                return Err(EPERM);
-            }
-
-            inner.pgroup.remove_member(self.pid);
-            inner.pgroup = ProcessGroup::new(self, &inner.session);
-            ProcessList::get().add_pgroup(&inner.pgroup);
-        }
-
-        Ok(())
-    }
-
-    /// Set the process group id of the process `pid` to `pgid`.
-    ///
-    /// This function should be called on the process that issued the syscall in order to do
-    /// permission checks.
-    pub fn setpgid(self: &Arc<Self>, pid: u32, pgid: u32) -> KResult<()> {
-        // We may set pgid of either the calling process or a child process.
-        if pid == self.pid {
-            self.do_setpgid(pgid)
-        } else {
-            let child = {
-                // If `pid` refers to one of our children, the thread leaders must be
-                // in out children list.
-                let inner = self.inner.lock();
-                let child = {
-                    let child = inner.children.get(&pid);
-                    child.and_then(Weak::upgrade).ok_or(ESRCH)?
-                };
-
-                // Changing the process group of a child is only allowed
-                // if we are in the same session.
-                if child.process.sid() != inner.session.sid {
-                    return Err(EPERM);
-                }
-
-                child
-            };
-
-            // TODO: Check whether we, as a child, have already performed an `execve`.
-            //       If so, we should return `Err(EACCES)`.
-            child.process.do_setpgid(pgid)
-        }
-    }
-
-    pub fn sid(&self) -> u32 {
-        self.inner.lock().session.sid
-    }
-
-    pub fn pgid(&self) -> u32 {
-        self.inner.lock().pgroup.pgid
-    }
-
-    pub fn session(&self) -> Arc<Session> {
-        self.inner.lock().session.clone()
-    }
-
-    pub fn pgroup(&self) -> Arc<ProcessGroup> {
-        self.inner.lock().pgroup.clone()
-    }
-}
-
 #[allow(dead_code)]
 impl UserDescriptorFlags {
     fn is_32bit_segment(&self) -> bool {
@@ -801,9 +112,14 @@ impl UserDescriptorFlags {
 }
 
 impl Thread {
-    pub unsafe fn new_for_init(name: Arc<[u8]>, process: &Arc<Process>) -> Arc<Self> {
+    pub unsafe fn new_for_init(
+        name: Arc<[u8]>,
+        tid: u32,
+        process: &Arc<Process>,
+        procs: &mut ProcessList,
+    ) -> Arc<Self> {
         let thread = Arc::new(Self {
-            tid: process.pid,
+            tid,
             process: process.clone(),
             files: FileArray::new_for_init(),
             fs_context: FsContext::new_for_init(),
@@ -819,40 +135,39 @@ impl Thread {
             }),
         });
 
-        process.add_thread(&thread);
+        process.add_thread(&thread, procs.as_pos_mut());
         thread
     }
 
-    pub fn new_cloned(other: &Self) -> Arc<Self> {
-        let process = Process::new_cloned(&other.process);
+    pub fn new_cloned(&self, procs: &mut ProcessList) -> Arc<Self> {
+        let process = Process::new_cloned(&self.process, procs);
 
-        let other_state = other.state.lock();
-        let other_inner = other.inner.lock();
-        assert!(matches!(*other_state, ThreadState::Running));
+        let state = self.state.lock();
+        let inner = self.inner.lock();
+        assert!(matches!(*state, ThreadState::Running));
 
-        let signal_list = other.signal_list.clone();
+        let signal_list = self.signal_list.clone();
         signal_list.clear_pending();
 
         let thread = Arc::new(Self {
             tid: process.pid,
             process: process.clone(),
-            files: FileArray::new_cloned(&other.files),
-            fs_context: FsContext::new_cloned(&other.fs_context),
+            files: FileArray::new_cloned(&self.files),
+            fs_context: FsContext::new_cloned(&self.fs_context),
             signal_list,
             kstack: RefCell::new(KernelStack::new()),
             context: UnsafeCell::new(TaskContext::new()),
             state: Spin::new(ThreadState::Preparing),
             oncpu: AtomicBool::new(false),
             inner: Spin::new(ThreadInner {
-                name: other_inner.name.clone(),
-                tls: other_inner.tls.clone(),
-                set_child_tid: other_inner.set_child_tid,
+                name: inner.name.clone(),
+                tls: inner.tls.clone(),
+                set_child_tid: inner.set_child_tid,
             }),
         });
 
-        ProcessList::get().add_thread(&thread);
-        other.process.add_child(&thread);
-        process.add_thread(&thread);
+        procs.add_thread(&thread);
+        process.add_thread(&thread, procs.as_pos_mut());
         thread
     }
 
@@ -861,11 +176,14 @@ impl Thread {
     }
 
     pub fn do_stop(self: &Arc<Self>, signal: Signal) {
-        if let Some(parent) = self.process.parent() {
-            parent.wait_list.notify(WaitObject {
-                pid: self.process.pid,
-                code: WaitType::Stopped(signal),
-            });
+        if let Some(parent) = self.process.parent.load() {
+            parent.notify(
+                WaitObject {
+                    pid: self.process.pid,
+                    code: WaitType::Stopped(signal),
+                },
+                ProcessList::get().lock_shared().as_pos(),
+            );
         }
 
         preempt::disable();
@@ -877,11 +195,14 @@ impl Thread {
     }
 
     pub fn do_continue(self: &Arc<Self>) {
-        if let Some(parent) = self.process.parent() {
-            parent.wait_list.notify(WaitObject {
-                pid: self.process.pid,
-                code: WaitType::Continued,
-            });
+        if let Some(parent) = self.process.parent.load() {
+            parent.notify(
+                WaitObject {
+                    pid: self.process.pid,
+                    code: WaitType::Continued,
+                },
+                ProcessList::get().lock_shared().as_pos(),
+            );
         }
     }
 
@@ -1015,135 +336,3 @@ unsafe extern "C" fn fork_return() {
 
 // TODO: Maybe we can find a better way instead of using `RefCell` for `KernelStack`?
 unsafe impl Sync for Thread {}
-
-impl WaitList {
-    pub fn new(process: Weak<Process>) -> Self {
-        Self {
-            wait_procs: Spin::new(VecDeque::new()),
-            cv_wait_procs: CondVar::new(),
-            process,
-        }
-    }
-
-    pub fn notify(&self, wait: WaitObject) {
-        let mut wait_procs = self.wait_procs.lock();
-        wait_procs.push_back(wait);
-        self.cv_wait_procs.notify_all();
-
-        self.process
-            .upgrade()
-            .expect("`process` must be valid if we are using `WaitList`")
-            .raise(Signal::SIGCHLD);
-    }
-
-    /// Notify some processes in batch. The process is waken up if we have really notified
-    /// some processes.
-    ///
-    /// # Lock
-    /// This function locks the `wait_procs` and returns a `NotifyBatch` that
-    /// will unlock it on dropped.
-    pub fn notify_batch(&self) -> NotifyBatch {
-        NotifyBatch {
-            wait_procs: self.wait_procs.lock(),
-            cv: &self.cv_wait_procs,
-            needs_notify: false,
-            process: &self.process,
-        }
-    }
-
-    pub fn drain_exited(&self) -> DrainExited {
-        DrainExited {
-            wait_procs: self.wait_procs.lock(),
-        }
-    }
-
-    pub fn entry(&self, want_stop: bool, want_continue: bool) -> Entry {
-        Entry {
-            wait_procs: self.wait_procs.lock(),
-            cv: &self.cv_wait_procs,
-            want_stop,
-            want_continue,
-        }
-    }
-}
-
-impl Entry<'_, '_> {
-    pub fn get(&mut self) -> Option<WaitObject> {
-        if let Some(idx) = self
-            .wait_procs
-            .iter()
-            .enumerate()
-            .filter(|(_, item)| {
-                if item.stopped().is_some() {
-                    self.want_stop
-                } else if item.is_continue() {
-                    self.want_continue
-                } else {
-                    true
-                }
-            })
-            .map(|(idx, _)| idx)
-            .next()
-        {
-            Some(self.wait_procs.remove(idx).unwrap())
-        } else {
-            None
-        }
-    }
-
-    pub fn wait(&mut self) -> KResult<()> {
-        self.cv.wait(&mut self.wait_procs);
-        if Thread::current().signal_list.has_pending_signal() {
-            return Err(EINTR);
-        }
-        Ok(())
-    }
-}
-
-impl DrainExited<'_> {
-    pub fn into_iter(&mut self) -> impl Iterator<Item = WaitObject> + '_ {
-        // We don't propagate stop and continue to the new parent.
-        self.wait_procs
-            .drain(..)
-            .filter(|item| item.stopped().is_none() && !item.is_continue())
-    }
-}
-
-impl NotifyBatch<'_, '_, '_> {
-    pub fn notify(&mut self, wait: WaitObject) {
-        self.wait_procs.push_back(wait);
-    }
-
-    /// Finish the batch and notify all if we have notified some processes.
-    pub fn finish(self) {}
-}
-
-impl Drop for NotifyBatch<'_, '_, '_> {
-    fn drop(&mut self) {
-        if self.needs_notify {
-            self.cv.notify_all();
-
-            self.process
-                .upgrade()
-                .expect("`process` must be valid if we are using `WaitList`")
-                .raise(Signal::SIGCHLD);
-        }
-    }
-}
-
-impl Process {
-    pub fn parent(&self) -> Option<Arc<Process>> {
-        self.inner.lock().parent.clone()
-    }
-}
-
-pub fn init_multitasking() {
-    // Lazy init
-    assert!(ProcessList::get().try_find_thread(1).is_some());
-
-    unsafe {
-        // SAFETY: Preemption is disabled outside this function.
-        Thread::current().load_interrupt_stack();
-    }
-    Thread::current().process.mm_list.switch_page_table();
-}

+ 11 - 6
src/kernel/terminal.rs

@@ -5,10 +5,14 @@ use alloc::{
 use bindings::{EINTR, ENOTTY, EPERM};
 use bitflags::bitflags;
 
-use crate::{io::Buffer, prelude::*, sync::CondVar};
+use crate::{
+    io::Buffer,
+    prelude::*,
+    sync::{AsRefPosition as _, CondVar},
+};
 
 use super::{
-    task::{Session, Signal, Thread},
+    task::{ProcessList, Session, Signal, Thread},
     user::{UserPointer, UserPointerMut},
 };
 
@@ -588,10 +592,10 @@ impl Terminal {
         match request {
             TerminalIORequest::GetProcessGroup(pgid_pointer) => {
                 let session = self.inner.lock().session.upgrade();
-                let pgid = session.map(|session| session.foreground_pgid()).flatten();
+                let pgroup = session.map(|session| session.foreground()).flatten();
 
-                if let Some(pgid) = pgid {
-                    pgid_pointer.write(pgid)
+                if let Some(pgroup) = pgroup {
+                    pgid_pointer.write(pgroup.pgid)
                 } else {
                     Err(ENOTTY)
                 }
@@ -599,11 +603,12 @@ impl Terminal {
             TerminalIORequest::SetProcessGroup(pgid) => {
                 let pgid = pgid.read()?;
 
+                let procs = ProcessList::get().lock_shared();
                 let inner = self.inner.lock();
                 let session = inner.session.upgrade();
 
                 if let Some(session) = session {
-                    session.set_foreground_pgid(pgid)
+                    session.set_foreground_pgid(pgid, procs.as_pos())
                 } else {
                     Err(ENOTTY)
                 }

+ 5 - 2
src/kernel/vfs/inode.rs

@@ -5,7 +5,10 @@ use bindings::{
     S_IFMT,
 };
 use core::{
-    mem::MaybeUninit, ops::ControlFlow, ptr::addr_of_mut, sync::atomic::{AtomicU32, AtomicU64, Ordering}
+    mem::MaybeUninit,
+    ops::ControlFlow,
+    ptr::addr_of_mut,
+    sync::atomic::{AtomicU32, AtomicU64, Ordering},
 };
 
 use super::{dentry::Dentry, s_isblk, s_ischr, vfs::Vfs, DevId, TimeSpec};
@@ -241,7 +244,7 @@ pub trait Inode: Send + Sync + InodeInner {
 
         f(
             uninit_mut.as_mut_ptr(),
-            // Safety: `idata` is initialized
+            // SAFETY: `idata` is initialized and we will never move the lock.
             &unsafe { idata.assume_init_ref() }.rwsem.lock_shared(),
         );
 

+ 7 - 5
src/kinit.cpp

@@ -48,6 +48,8 @@ static inline void setup_early_kernel_page_table() {
 }
 
 extern "C" char KIMAGE_PAGES[];
+extern "C" void create_pages(uintptr_t start, uintptr_t end);
+extern "C" void mark_present(uintptr_t start, uintptr_t end);
 
 static inline void setup_buddy(uintptr_t addr_max) {
     using namespace kernel::mem;
@@ -95,13 +97,13 @@ static inline void setup_buddy(uintptr_t addr_max) {
         if (start > end)
             continue;
 
-        mem::paging::create_zone(start, end);
+        create_pages(start, end);
     }
 
     // unused space
-    create_zone(0x9000, 0x80000);
-    create_zone(0x100000, 0x200000);
-    create_zone(real_start_pfn, saved_start_pfn);
+    create_pages(0x9000, 0x80000);
+    create_pages(0x100000, 0x200000);
+    create_pages(real_start_pfn, saved_start_pfn);
 }
 
 static inline void save_memory_info(bootloader_data* data) {
@@ -133,7 +135,7 @@ extern "C" void NORETURN kernel_init(bootloader_data* data) {
     init_allocator();
 
     using namespace mem::paging;
-    auto kernel_stack_pfn = page_to_pfn(alloc_pages(9));
+    auto kernel_stack_pfn = page_to_pfn(c_alloc_pages(9)) << 12;
     auto kernel_stack_ptr = mem::physaddr<std::byte>{kernel_stack_pfn} + (1 << 9) * 0x1000;
 
     asm volatile(

+ 2 - 5
src/lib.rs

@@ -16,6 +16,7 @@ mod driver;
 mod elf;
 mod fs;
 mod hash;
+mod intrusive_list;
 mod io;
 mod kernel;
 mod net;
@@ -125,11 +126,7 @@ pub extern "C" fn rust_kinit(early_kstack_pfn: usize) -> ! {
 
     // We need root dentry to be present in constructor of `FsContext`.
     // So call `init_vfs` first, then `init_multitasking`.
-    init_multitasking();
-
-    Thread::current().init(init_process as usize);
-
-    Scheduler::get().lock().uwake(&Thread::current());
+    unsafe { init_multitasking(init_process) };
 
     let mut unuse_ctx = arch::TaskContext::new();
     // TODO: Temporary solution: we will never access this later on.

+ 39 - 3
src/rcu.rs

@@ -1,5 +1,6 @@
 use core::{
     ops::Deref,
+    ptr::NonNull,
     sync::atomic::{AtomicPtr, Ordering},
 };
 
@@ -40,7 +41,7 @@ impl<'data, T: 'data> Deref for RCUReadGuard<'data, T> {
     }
 }
 
-fn rcu_sync() {
+pub fn rcu_sync() {
     GLOBAL_RCU_SEM.lock();
 }
 
@@ -184,6 +185,19 @@ impl<'lt, T: RCUNode<T>> Iterator for RCUIterator<'lt, T> {
 
 pub struct RCUPointer<T>(AtomicPtr<T>);
 
+impl<T: core::fmt::Debug> core::fmt::Debug for RCUPointer<T> {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        match NonNull::new(self.0.load(Ordering::Acquire)) {
+            Some(pointer) => {
+                let borrowed = BorrowedArc::from_raw(pointer.as_ptr());
+                f.write_str("RCUPointer of ")?;
+                borrowed.fmt(f)
+            }
+            None => f.debug_tuple("NULL RCUPointer").finish(),
+        }
+    }
+}
+
 impl<T> RCUPointer<T> {
     pub fn new_with(value: Arc<T>) -> Self {
         Self(AtomicPtr::new(Arc::into_raw(value) as *mut _))
@@ -204,7 +218,18 @@ impl<T> RCUPointer<T> {
     }
 
     /// # Safety
-    /// Caller must ensure that the pointer is freed after all readers are done.
+    /// Caller must ensure no writers are updating the pointer.
+    pub unsafe fn load_locked<'lt>(&self) -> Option<BorrowedArc<'lt, T>> {
+        let ptr = self.0.load(Ordering::Acquire);
+        if ptr.is_null() {
+            None
+        } else {
+            Some(BorrowedArc::from_raw(ptr))
+        }
+    }
+
+    /// # Safety
+    /// Caller must ensure that the actual pointer is freed after all readers are done.
     pub unsafe fn swap(&self, new: Option<Arc<T>>) -> Option<Arc<T>> {
         let new = new
             .map(|arc| Arc::into_raw(arc) as *mut T)
@@ -215,8 +240,19 @@ impl<T> RCUPointer<T> {
         if old.is_null() {
             None
         } else {
-            rcu_sync();
             Some(unsafe { Arc::from_raw(old) })
         }
     }
 }
+
+impl<T> Drop for RCUPointer<T> {
+    fn drop(&mut self) {
+        // SAFETY: We call `rcu_sync()` to ensure that all readers are done.
+        if let Some(arc) = unsafe { self.swap(None) } {
+            // We only wait if there are other references.
+            if Arc::strong_count(&arc) == 1 {
+                rcu_sync();
+            }
+        }
+    }
+}

+ 2 - 31
src/sync.rs

@@ -1,5 +1,6 @@
 mod condvar;
 pub mod lock;
+mod locked;
 pub mod semaphore;
 pub mod spin;
 pub mod strategy;
@@ -67,35 +68,6 @@ pub type RwSemWriteGuard<'lock, T> = lock::Guard<'lock, T, semaphore::RwSemaphor
 pub type CondVar = condvar::CondVar<true>;
 pub type UCondVar = condvar::CondVar<false>;
 
-pub struct Locked<T: Sized, U: ?Sized> {
-    inner: UnsafeCell<T>,
-    guard: *const U,
-}
-
-unsafe impl<T: Sized + Send, U: ?Sized> Send for Locked<T, U> {}
-unsafe impl<T: Sized + Send + Sync, U: ?Sized> Sync for Locked<T, U> {}
-
-impl<T: Sized + Sync, U: ?Sized> Locked<T, U> {
-    pub fn new(value: T, from: &U) -> Self {
-        Self {
-            inner: UnsafeCell::new(value),
-            guard: from,
-        }
-    }
-
-    pub fn access<'lt>(&'lt self, guard: &'lt U) -> &'lt T {
-        assert_eq!(self.guard, guard as *const U, "wrong guard");
-        // SAFETY: The guard protects the shared access to the inner value.
-        unsafe { self.inner.get().as_ref() }.unwrap()
-    }
-
-    pub fn access_mut<'lt>(&'lt self, guard: &'lt mut U) -> &'lt mut T {
-        assert_eq!(self.guard, guard as *const U, "wrong guard");
-        // SAFETY: The guard protects the exclusive access to the inner value.
-        unsafe { self.inner.get().as_mut() }.unwrap()
-    }
-}
-
 macro_rules! might_sleep {
     () => {
         assert_eq!(
@@ -113,6 +85,5 @@ macro_rules! might_sleep {
     };
 }
 
-use core::cell::UnsafeCell;
-
+pub use locked::{AsRefMutPosition, AsRefPosition, Locked, RefMutPosition, RefPosition};
 pub(crate) use might_sleep;

+ 2 - 2
src/sync/condvar.rs

@@ -66,7 +66,7 @@ impl<const I: bool> CondVar<I> {
     ///
     /// # Return
     /// - `true`: a pending signal was received
-    pub fn wait<'a, T, S: LockStrategy>(&self, guard: &mut Guard<'a, T, S>) {
+    pub fn wait<'a, T, S: LockStrategy, const W: bool>(&self, guard: &mut Guard<'a, T, S, W>) {
         preempt::disable();
         {
             let mut scheduler = Scheduler::get().lock_irq();
@@ -88,6 +88,6 @@ impl<const I: bool> CondVar<I> {
 
         self.waiters
             .lock_irq()
-            .retain(|waiter| waiter != Thread::current().as_ref());
+            .retain(|waiter| waiter.tid != Thread::current().tid);
     }
 }

+ 58 - 2
src/sync/lock.rs

@@ -3,7 +3,12 @@ use core::{
     ops::{Deref, DerefMut},
 };
 
-use super::{spin::IrqStrategy, strategy::LockStrategy};
+use super::{
+    semaphore::{RwSemaphoreStrategy, SemaphoreStrategy},
+    spin::IrqStrategy,
+    strategy::LockStrategy,
+    RwSemWriteGuard, SemGuard,
+};
 
 pub struct Lock<Value: ?Sized, Strategy: LockStrategy> {
     strategy_data: Strategy::StrategyData,
@@ -49,7 +54,58 @@ impl<Value: Default, Strategy: LockStrategy> Default for Lock<Value, Strategy> {
     }
 }
 
+#[allow(dead_code)]
+impl<Value: ?Sized> Lock<Value, SemaphoreStrategy> {
+    #[inline(always)]
+    pub fn lock_nosleep(&self) -> SemGuard<'_, Value> {
+        loop {
+            if !self.is_locked() {
+                if let Some(guard) = self.try_lock() {
+                    return guard;
+                }
+            }
+
+            arch::pause();
+        }
+    }
+}
+
+impl<Value: ?Sized> Lock<Value, RwSemaphoreStrategy> {
+    #[inline(always)]
+    pub fn lock_nosleep(&self) -> RwSemWriteGuard<'_, Value> {
+        loop {
+            if self.is_locked() {
+                if let Some(guard) = self.try_lock() {
+                    return guard;
+                }
+            }
+
+            arch::pause();
+        }
+    }
+}
+
+#[allow(dead_code)]
 impl<Value: ?Sized, Strategy: LockStrategy> Lock<Value, Strategy> {
+    #[inline(always)]
+    pub fn is_locked(&self) -> bool {
+        unsafe { Strategy::is_locked(&self.strategy_data) }
+    }
+
+    #[inline(always)]
+    pub fn try_lock<'lt>(&'lt self) -> Option<Guard<'lt, Value, Strategy>> {
+        if unsafe { Strategy::is_locked(&self.strategy_data) } {
+            return None;
+        }
+
+        unsafe { Strategy::try_lock(&self.strategy_data) }.map(|context| Guard {
+            _phantom: core::marker::PhantomData,
+            value: &self.value,
+            strategy_data: &self.strategy_data,
+            context,
+        })
+    }
+
     #[inline(always)]
     pub fn lock<'lt>(&'lt self) -> Guard<'lt, Value, Strategy> {
         Guard {
@@ -103,7 +159,7 @@ pub struct Guard<'lock, Value: ?Sized, Strategy: LockStrategy, const WRITE: bool
     context: Strategy::GuardContext,
 }
 
-impl<'lock, Value: ?Sized, Strategy: LockStrategy> Guard<'lock, Value, Strategy> {
+impl<'lock, Value: ?Sized, Strategy: LockStrategy, const W: bool> Guard<'lock, Value, Strategy, W> {
     /// # Safety
     /// Use of the lock after calling this function without relocking is undefined behavior.
     #[inline(always)]

+ 150 - 0
src/sync/locked.rs

@@ -0,0 +1,150 @@
+use core::{cell::UnsafeCell, marker::PhantomData};
+
+use super::{lock::Guard, strategy::LockStrategy};
+
+pub struct RefMutPosition<'pos, T: ?Sized> {
+    address: *const T,
+    _phantom: PhantomData<&'pos ()>,
+}
+
+pub struct RefPosition<'pos, T: ?Sized> {
+    address: *const T,
+    _phantom: PhantomData<&'pos ()>,
+}
+
+pub trait AsRefMutPosition<'guard, 'pos, T: ?Sized>: 'guard {
+    fn as_pos_mut(&self) -> RefMutPosition<'pos, T>
+    where
+        'guard: 'pos;
+}
+
+pub trait AsRefPosition<'guard, 'pos, T: ?Sized>: 'guard {
+    fn as_pos(&self) -> RefPosition<'pos, T>
+    where
+        'guard: 'pos;
+}
+
+unsafe impl<T: Sized + Send, U: ?Sized> Send for Locked<T, U> {}
+unsafe impl<T: Sized + Send + Sync, U: ?Sized> Sync for Locked<T, U> {}
+pub struct Locked<T: Sized, U: ?Sized> {
+    inner: UnsafeCell<T>,
+    guard: *const U,
+}
+
+impl<T: ?Sized> Copy for RefPosition<'_, T> {}
+impl<T: ?Sized> Clone for RefPosition<'_, T> {
+    fn clone(&self) -> Self {
+        Self {
+            address: self.address,
+            _phantom: self._phantom,
+        }
+    }
+}
+
+impl<T: ?Sized> Copy for RefMutPosition<'_, T> {}
+impl<T: ?Sized> Clone for RefMutPosition<'_, T> {
+    fn clone(&self) -> Self {
+        Self {
+            address: self.address,
+            _phantom: self._phantom,
+        }
+    }
+}
+
+impl<'lock, 'pos, T: ?Sized> AsRefMutPosition<'lock, 'pos, T> for &'lock mut T {
+    fn as_pos_mut(&self) -> RefMutPosition<'pos, T>
+    where
+        'lock: 'pos,
+    {
+        RefMutPosition {
+            address: *self as *const T,
+            _phantom: PhantomData,
+        }
+    }
+}
+
+impl<'lock, 'pos, T, S> AsRefMutPosition<'lock, 'pos, T> for Guard<'lock, T, S, true>
+where
+    T: ?Sized,
+    S: LockStrategy + 'lock,
+{
+    fn as_pos_mut(&self) -> RefMutPosition<'pos, T>
+    where
+        'lock: 'pos,
+    {
+        RefMutPosition {
+            address: &raw const **self,
+            _phantom: PhantomData,
+        }
+    }
+}
+
+impl<'lock, 'pos, T: ?Sized> AsRefPosition<'lock, 'pos, T> for &'lock T {
+    fn as_pos(&self) -> RefPosition<'pos, T>
+    where
+        'lock: 'pos,
+    {
+        RefPosition {
+            address: *self as *const T,
+            _phantom: PhantomData,
+        }
+    }
+}
+
+impl<'lock, 'pos, T: ?Sized> AsRefPosition<'lock, 'pos, T> for &'lock mut T {
+    fn as_pos(&self) -> RefPosition<'pos, T>
+    where
+        'lock: 'pos,
+    {
+        RefPosition {
+            address: *self as *const T,
+            _phantom: PhantomData,
+        }
+    }
+}
+
+impl<'lock, 'pos, T, S, const B: bool> AsRefPosition<'lock, 'pos, T> for Guard<'lock, T, S, B>
+where
+    T: ?Sized,
+    S: LockStrategy + 'lock,
+{
+    fn as_pos(&self) -> RefPosition<'pos, T>
+    where
+        'lock: 'pos,
+    {
+        RefPosition {
+            address: &raw const **self,
+            _phantom: PhantomData,
+        }
+    }
+}
+
+impl<T: Sized, U: ?Sized> core::fmt::Debug for Locked<T, U> {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        f.debug_struct("Locked")
+            .field("value", &self.inner)
+            .field("guard", &self.guard)
+            .finish()
+    }
+}
+
+impl<T: Sized + Sync, U: ?Sized> Locked<T, U> {
+    pub fn new(value: T, guard: *const U) -> Self {
+        Self {
+            inner: UnsafeCell::new(value),
+            guard,
+        }
+    }
+
+    pub fn access<'lt>(&'lt self, guard: RefPosition<'lt, U>) -> &'lt T {
+        assert_eq!(self.guard, guard.address, "Locked: Wrong guard");
+        // SAFETY: The guard protects the shared access to the inner value.
+        unsafe { self.inner.get().as_ref() }.unwrap()
+    }
+
+    pub fn access_mut<'lt>(&'lt self, guard: RefMutPosition<'lt, U>) -> &'lt mut T {
+        assert_eq!(self.guard, guard.address, "Locked: Wrong guard");
+        // SAFETY: The guard protects the exclusive access to the inner value.
+        unsafe { self.inner.get().as_mut() }.unwrap()
+    }
+}

+ 34 - 0
src/sync/semaphore.rs

@@ -28,6 +28,23 @@ unsafe impl<const MAX: usize> LockStrategy for SemaphoreStrategy<MAX> {
         }
     }
 
+    #[inline(always)]
+    unsafe fn is_locked(data: &Self::StrategyData) -> bool {
+        *data.counter.lock() == MAX
+    }
+
+    #[inline(always)]
+    unsafe fn try_lock(data: &Self::StrategyData) -> Option<Self::GuardContext> {
+        let mut counter = data.counter.lock();
+        assert!(*counter <= MAX);
+        if *counter < MAX {
+            *counter += 1;
+            Some(())
+        } else {
+            None
+        }
+    }
+
     #[inline(always)]
     /// Acquire the semaphore in write mode
     ///
@@ -88,6 +105,23 @@ unsafe impl<const READ_MAX: isize> LockStrategy for RwSemaphoreStrategy<READ_MAX
     type StrategyData = RwSemaphoreData;
     type GuardContext = ();
 
+    #[inline(always)]
+    unsafe fn is_locked(data: &Self::StrategyData) -> bool {
+        *data.counter.lock() != 0
+    }
+
+    #[inline(always)]
+    unsafe fn try_lock(data: &Self::StrategyData) -> Option<Self::GuardContext> {
+        let mut counter = data.counter.lock();
+        assert!(*counter >= -1 && *counter <= READ_MAX);
+        if *counter == 0 {
+            *counter -= 1;
+            Some(())
+        } else {
+            None
+        }
+    }
+
     #[inline(always)]
     fn data() -> Self::StrategyData {
         RwSemaphoreData {

+ 35 - 0
src/sync/spin.rs

@@ -23,6 +23,23 @@ unsafe impl LockStrategy for SpinStrategy {
         AtomicBool::new(false)
     }
 
+    #[inline(always)]
+    unsafe fn is_locked(data: &Self::StrategyData) -> bool {
+        data.load(Ordering::Relaxed)
+    }
+
+    #[inline(always)]
+    unsafe fn try_lock(data: &Self::StrategyData) -> Option<Self::GuardContext> {
+        use Ordering::{Acquire, Relaxed};
+        preempt::disable();
+
+        if data.compare_exchange(false, true, Acquire, Relaxed).is_ok() {
+            Some(())
+        } else {
+            None
+        }
+    }
+
     #[inline(always)]
     unsafe fn do_lock(data: &Self::StrategyData) -> Self::GuardContext {
         use Ordering::{Acquire, Relaxed};
@@ -92,4 +109,22 @@ unsafe impl<Strategy: LockStrategy> LockStrategy for IrqStrategy<Strategy> {
     unsafe fn do_relock(data: &Self::StrategyData, context: &mut Self::GuardContext) {
         Strategy::do_relock(data, &mut context.0);
     }
+
+    #[inline(always)]
+    unsafe fn is_locked(data: &Self::StrategyData) -> bool {
+        Strategy::is_locked(data)
+    }
+
+    #[inline(always)]
+    unsafe fn try_lock(data: &Self::StrategyData) -> Option<Self::GuardContext> {
+        let mut irq_context: usize;
+        asm!(
+            "pushf",
+            "pop {context}",
+            "cli",
+            context = out(reg) irq_context,
+        );
+
+        Strategy::try_lock(data).map(|lock_context| (lock_context, irq_context))
+    }
 }

+ 7 - 12
src/sync/strategy.rs

@@ -4,30 +4,25 @@ pub unsafe trait LockStrategy {
 
     fn data() -> Self::StrategyData;
 
+    unsafe fn is_locked(data: &Self::StrategyData) -> bool;
+
+    unsafe fn try_lock(data: &Self::StrategyData) -> Option<Self::GuardContext>;
+
     unsafe fn do_lock(data: &Self::StrategyData) -> Self::GuardContext;
 
-    unsafe fn do_unlock(
-        data: &Self::StrategyData,
-        context: &mut Self::GuardContext,
-    );
+    unsafe fn do_unlock(data: &Self::StrategyData, context: &mut Self::GuardContext);
 
     unsafe fn do_lock_shared(data: &Self::StrategyData) -> Self::GuardContext {
         Self::do_lock(data)
     }
 
     #[inline(always)]
-    unsafe fn do_temporary_unlock(
-        data: &Self::StrategyData,
-        context: &mut Self::GuardContext,
-    ) {
+    unsafe fn do_temporary_unlock(data: &Self::StrategyData, context: &mut Self::GuardContext) {
         Self::do_unlock(data, context);
     }
 
     #[inline(always)]
-    unsafe fn do_relock(
-        data: &Self::StrategyData,
-        context: &mut Self::GuardContext,
-    ) {
+    unsafe fn do_relock(data: &Self::StrategyData, context: &mut Self::GuardContext) {
         *context = Self::do_lock(data);
     }
 }