Bläddra i källkod

change(mm_list): separate mm_list from process on elf loading

greatbridf 3 månader sedan
förälder
incheckning
f5b4da3309

+ 41 - 68
src/elf.rs

@@ -236,20 +236,15 @@ impl ParsedElf32 {
         })
     }
 
-    /// Load the ELF file into memory. Return the entry point address.
+    /// Load the ELF file into memory. Return the entry point address and the memory list containing the program data.
     ///
     /// We clear the user space and load the program headers into memory.
     /// Can't make a way back if failed from now on.
     ///
     /// # Return
-    /// `(entry_ip, sp)`
-    pub fn load(
-        self,
-        mm_list: &MMList,
-        args: Vec<CString>,
-        envs: Vec<CString>,
-    ) -> KResult<(VAddr, VAddr)> {
-        mm_list.clear_user();
+    /// `(entry_ip, sp, mm_list)`
+    pub fn load(self, args: Vec<CString>, envs: Vec<CString>) -> KResult<(VAddr, VAddr, MMList)> {
+        let mm_list = MMList::new();
 
         let mut data_segment_end = VAddr(0);
         for phent in self
@@ -312,67 +307,45 @@ impl ParsedElf32 {
             },
         )?;
 
-        // TODO!!!!!: A temporary workaround.
-        mm_list.switch_page_table();
-
-        let mut sp = 0xc0000000u32;
-        let arg_addrs = args
-            .into_iter()
-            .map(|arg| push_string(&mut sp, arg))
-            .collect::<Vec<_>>();
-
-        let env_addrs = envs
-            .into_iter()
-            .map(|env| push_string(&mut sp, env))
-            .collect::<Vec<_>>();
-
-        let longs = 2 // Null auxiliary vector entry
-            + env_addrs.len() + 1 // Envs + null
-            + arg_addrs.len() + 1 // Args + null
-            + 1; // argc
-
-        sp -= longs as u32 * 4;
-        sp &= !0xf; // Align to 16 bytes
-
-        let mut cursor = (0..longs)
-            .map(|idx| UserPointerMut::<u32>::new_vaddr(sp as usize + size_of::<u32>() * idx));
-
-        // argc
-        cursor.next().unwrap()?.write(arg_addrs.len() as u32)?;
-
-        // args
-        for arg_addr in arg_addrs.into_iter() {
-            cursor.next().unwrap()?.write(arg_addr)?;
-        }
-        cursor.next().unwrap()?.write(0)?; // null
-
-        // envs
-        for env_addr in env_addrs.into_iter() {
-            cursor.next().unwrap()?.write(env_addr)?;
-        }
-        cursor.next().unwrap()?.write(0)?; // null
-
-        // Null auxiliary vector
-        cursor.next().unwrap()?.write(0)?; // AT_NULL
-        cursor.next().unwrap()?.write(0)?; // AT_NULL
-
-        // TODO!!!!!: A temporary workaround.
-        Thread::current().process.mm_list.switch_page_table();
-
-        assert!(cursor.next().is_none());
-        Ok((VAddr(self.entry as usize), VAddr(sp as usize)))
+        let mut sp = VAddr::from(0xc0000000); // Current stack top
+        let arg_addrs = push_strings(&mm_list, &mut sp, args)?;
+        let env_addrs = push_strings(&mm_list, &mut sp, envs)?;
+
+        let mut longs = vec![];
+        longs.push(arg_addrs.len() as u32); // argc
+        longs.extend(arg_addrs.into_iter()); // args
+        longs.push(0); // null
+        longs.extend(env_addrs.into_iter()); // envs
+        longs.push(0); // null
+        longs.push(0); // AT_NULL
+        longs.push(0); // AT_NULL
+
+        sp = sp - longs.len() * size_of::<u32>();
+        sp = VAddr::from(usize::from(sp) & !0xf); // Align to 16 bytes
+
+        mm_list.access_mut(sp, longs.len() * size_of::<u32>(), |offset, data| {
+            data.copy_from_slice(unsafe {
+                core::slice::from_raw_parts(
+                    longs.as_ptr().byte_add(offset) as *const u8,
+                    data.len(),
+                )
+            })
+        })?;
+
+        Ok((VAddr(self.entry as usize), sp, mm_list))
     }
 }
 
-fn push_string(sp: &mut u32, string: CString) -> u32 {
-    let data = string.as_bytes_with_nul();
-    let new_sp = (*sp - data.len() as u32) & !0x3; // Align to 4 bytes
-
-    CheckedUserPointer::new(new_sp as *const u8, data.len())
-        .unwrap()
-        .write(data.as_ptr() as _, data.len())
-        .unwrap();
+fn push_strings(mm_list: &MMList, sp: &mut VAddr, strings: Vec<CString>) -> KResult<Vec<u32>> {
+    let mut addrs = vec![];
+    for string in strings {
+        let len = string.as_bytes_with_nul().len();
+        *sp = *sp - len;
+        mm_list.access_mut(*sp, len, |offset, data| {
+            data.copy_from_slice(&string.as_bytes_with_nul()[offset..offset + data.len()])
+        })?;
+        addrs.push(usize::from(*sp) as u32);
+    }
 
-    *sp = new_sp;
-    new_sp
+    Ok(addrs)
 }

+ 92 - 1
src/kernel/mem/mm_area.rs

@@ -1,6 +1,12 @@
+use bindings::PA_MMAP;
+
+use crate::{kernel::task::Signal, prelude::*};
+
 use core::{borrow::Borrow, cell::UnsafeCell, cmp::Ordering};
 
-use super::{Mapping, Permission, VAddr, VRange};
+use crate::bindings::root::{PA_A, PA_ANON, PA_COW, PA_P, PA_RW};
+
+use super::{Mapping, Page, PageBuffer, Permission, VAddr, VRange, PTE};
 
 #[derive(Debug)]
 pub struct MMArea {
@@ -76,6 +82,91 @@ impl MMArea {
             }
         }
     }
+
+    /// # Return
+    /// Whether the whole handling process is done.
+    pub fn handle_cow(&self, pte: &mut PTE) -> bool {
+        let mut attributes = pte.attributes();
+        let mut pfn = pte.pfn();
+
+        attributes &= !PA_COW as usize;
+        if self.permission.write {
+            attributes |= PA_RW as usize;
+        } else {
+            attributes &= !PA_RW as usize;
+        }
+
+        let page = unsafe { Page::take_pfn(pfn, 0) };
+        if unsafe { page.load_refcount() } == 1 {
+            // SAFETY: This is actually safe. If we read `1` here and we have `MMList` lock
+            // held, there couldn't be neither other processes sharing the page, nor other
+            // threads making the page COW at the same time.
+            pte.set_attributes(attributes);
+            core::mem::forget(page);
+            return true;
+        }
+
+        let new_page = Page::alloc_one();
+        if attributes & PA_ANON as usize != 0 {
+            new_page.zero();
+        } else {
+            new_page.as_mut_slice().copy_from_slice(page.as_slice());
+        }
+
+        attributes &= !(PA_A | PA_ANON) as usize;
+
+        pfn = new_page.into_pfn();
+        pte.set(pfn, attributes);
+
+        false
+    }
+
+    /// # Arguments
+    /// * `offset`: The offset from the start of the mapping, aligned to 4KB boundary.
+    pub fn handle_mmap(&self, pte: &mut PTE, offset: usize) -> KResult<()> {
+        // TODO: Implement shared mapping
+        let mut attributes = pte.attributes();
+        let pfn = pte.pfn();
+
+        attributes |= PA_P as usize;
+
+        match &self.mapping {
+            Mapping::File(mapping) if offset < mapping.length => {
+                // SAFETY: Since we are here, the `pfn` must refer to a valid buddy page.
+                let page = unsafe { Page::from_pfn(pfn, 0) };
+                let nread = mapping
+                    .file
+                    .read(&mut PageBuffer::new(page.clone()), mapping.offset + offset)?;
+
+                if nread < page.len() {
+                    page.as_mut_slice()[nread..].fill(0);
+                }
+
+                if mapping.length - offset < 0x1000 {
+                    let length_to_end = mapping.length - offset;
+                    page.as_mut_slice()[length_to_end..].fill(0);
+                }
+            }
+            Mapping::File(_) => panic!("Offset out of range"),
+            _ => panic!("Anonymous mapping should not be PA_MMAP"),
+        }
+
+        attributes &= !PA_MMAP as usize;
+        pte.set_attributes(attributes);
+        Ok(())
+    }
+
+    pub fn handle(&self, pte: &mut PTE, offset: usize) -> KResult<()> {
+        if pte.is_cow() {
+            self.handle_cow(pte);
+        }
+
+        if pte.is_mmap() {
+            self.handle_mmap(pte, offset)?;
+        }
+
+        Ok(())
+    }
 }
 
 impl Eq for MMArea {}

+ 159 - 61
src/kernel/mem/mm_list.rs

@@ -1,23 +1,25 @@
 mod page_fault;
 
-use crate::prelude::*;
+use core::sync::atomic::{AtomicUsize, Ordering};
+
+use crate::{prelude::*, sync::ArcSwap};
 
 use alloc::{collections::btree_set::BTreeSet, sync::Arc};
-use bindings::{EEXIST, EINVAL, ENOMEM};
+use bindings::{EEXIST, EFAULT, EINVAL, ENOMEM, KERNEL_PML4};
 
 use crate::kernel::vfs::dentry::Dentry;
 
-use super::{MMArea, PageTable, VAddr, VRange};
+use super::{MMArea, Page, PageTable, VAddr, VRange};
 
 pub use page_fault::{handle_page_fault, PageFaultError};
 
 #[derive(Debug, Clone)]
 pub struct FileMapping {
-    file: Arc<Dentry>,
+    pub file: Arc<Dentry>,
     /// Offset in the file, aligned to 4KB boundary.
-    offset: usize,
+    pub offset: usize,
     /// Length of the mapping. Exceeding part will be zeroed.
-    length: usize,
+    pub length: usize,
 }
 
 #[derive(Debug, Clone, Copy)]
@@ -35,6 +37,7 @@ pub enum Mapping {
 #[derive(Debug)]
 struct MMListInner {
     areas: BTreeSet<MMArea>,
+    page_table: PageTable,
     break_start: Option<VRange>,
     break_pos: Option<VAddr>,
 }
@@ -43,9 +46,9 @@ struct MMListInner {
 pub struct MMList {
     /// # Safety
     /// This field might be used in IRQ context, so it should be locked with `lock_irq()`.
-    inner: Mutex<MMListInner>,
-    /// Do not modify entries in the page table without acquiring the `inner` lock.
-    page_table: PageTable,
+    inner: ArcSwap<Mutex<MMListInner>>,
+    /// Only used in kernel space to switch page tables on context switch.
+    root_page_table: AtomicUsize,
 }
 
 impl FileMapping {
@@ -110,7 +113,7 @@ impl MMListInner {
         }
     }
 
-    fn unmap(&mut self, page_table: &PageTable, start: VAddr, len: usize) -> KResult<()> {
+    fn unmap(&mut self, start: VAddr, len: usize) -> KResult<()> {
         assert_eq!(start.floor(), start);
         let end = (start + len).ceil();
         let range = VRange::new(start, end);
@@ -128,7 +131,7 @@ impl MMListInner {
             }
             if area.range() == range.start().into() {
                 let (left, right) = area.clone().split(range.start());
-                page_table.unmap(&right.unwrap());
+                self.page_table.unmap(&right.unwrap());
 
                 if let Some(left) = left {
                     assert!(
@@ -138,7 +141,7 @@ impl MMListInner {
                 }
             } else if area.range() == range.end().into() {
                 let (left, right) = area.clone().split(range.end());
-                page_table.unmap(&left.unwrap());
+                self.page_table.unmap(&left.unwrap());
 
                 assert!(
                     back_remaining
@@ -147,7 +150,7 @@ impl MMListInner {
                     "There should be only one `back`."
                 );
             } else {
-                page_table.unmap(area);
+                self.page_table.unmap(area);
             }
 
             false
@@ -165,7 +168,6 @@ impl MMListInner {
 
     fn mmap(
         &mut self,
-        page_table: &PageTable,
         at: VAddr,
         len: usize,
         mapping: Mapping,
@@ -181,8 +183,8 @@ impl MMListInner {
         }
 
         match &mapping {
-            Mapping::Anonymous => page_table.set_anonymous(range, permission),
-            Mapping::File(_) => page_table.set_mmapped(range, permission),
+            Mapping::Anonymous => self.page_table.set_anonymous(range, permission),
+            Mapping::File(_) => self.page_table.set_mmapped(range, permission),
         }
 
         self.areas.insert(MMArea::new(range, mapping, permission));
@@ -191,36 +193,41 @@ impl MMListInner {
 }
 
 impl MMList {
-    pub fn new() -> Arc<Self> {
-        Arc::new(Self {
-            inner: Mutex::new(MMListInner {
+    pub fn new() -> Self {
+        let page_table = PageTable::new();
+        Self {
+            root_page_table: AtomicUsize::from(page_table.root_page_table()),
+            inner: ArcSwap::new(Mutex::new(MMListInner {
                 areas: BTreeSet::new(),
+                page_table,
                 break_start: None,
                 break_pos: None,
-            }),
-            page_table: PageTable::new(),
-        })
+            })),
+        }
     }
 
-    pub fn new_cloned(&self) -> Arc<Self> {
-        let inner = self.inner.lock_irq();
+    pub fn new_cloned(&self) -> Self {
+        let inner = self.inner.borrow();
+        let inner = inner.lock_irq();
 
-        let list = Arc::new(Self {
-            inner: Mutex::new(MMListInner {
+        let page_table = PageTable::new();
+        let list = Self {
+            root_page_table: AtomicUsize::from(page_table.root_page_table()),
+            inner: ArcSwap::new(Mutex::new(MMListInner {
                 areas: inner.areas.clone(),
+                page_table,
                 break_start: inner.break_start,
                 break_pos: inner.break_pos,
-            }),
-            page_table: PageTable::new(),
-        });
+            })),
+        };
 
-        // SAFETY: `self.inner` already locked with IRQ disabled.
         {
-            let list_inner = list.inner.lock();
+            let list_inner = list.inner.borrow();
+            let list_inner = list_inner.lock();
 
             for area in list_inner.areas.iter() {
-                let new_iter = list.page_table.iter_user(area.range()).unwrap();
-                let old_iter = self.page_table.iter_user(area.range()).unwrap();
+                let new_iter = list_inner.page_table.iter_user(area.range()).unwrap();
+                let old_iter = inner.page_table.iter_user(area.range()).unwrap();
 
                 for (new, old) in new_iter.zip(old_iter) {
                     new.setup_cow(old);
@@ -229,29 +236,54 @@ impl MMList {
         }
 
         // We set some pages as COW, so we need to invalidate TLB.
-        self.page_table.lazy_invalidate_tlb_all();
+        inner.page_table.lazy_invalidate_tlb_all();
 
         list
     }
 
-    /// No need to do invalidation manually, `PageTable` already does it.
-    pub fn clear_user(&self) {
-        let mut inner = self.inner.lock_irq();
-        inner.areas.retain(|area| {
-            self.page_table.unmap(area);
-            false
-        });
-        inner.break_start = None;
-        inner.break_pos = None;
+    pub fn switch_page_table(&self) {
+        let root_page_table = self.root_page_table.load(Ordering::Relaxed);
+        assert_ne!(root_page_table, 0);
+        arch::set_root_page_table(root_page_table);
     }
 
-    pub fn switch_page_table(&self) {
-        self.page_table.switch();
+    pub fn replace(&self, new: Self) {
+        // Switch to kernel page table in case we are using the page table to be swapped and released.
+        let mut switched = false;
+        if arch::get_root_page_table() == self.root_page_table.load(Ordering::Relaxed) {
+            arch::set_root_page_table(KERNEL_PML4 as usize);
+            switched = true;
+        }
+
+        unsafe {
+            // SAFETY: Even if we're using the page table, we've switched to kernel page table.
+            // So it's safe to release the old memory list.
+            self.release();
+        }
+
+        // SAFETY: `self.inner` should be `None` after releasing.
+        self.inner.swap(Some(new.inner.borrow().clone()));
+        self.root_page_table.store(
+            new.root_page_table.load(Ordering::Relaxed),
+            Ordering::Relaxed,
+        );
+
+        if switched {
+            self.switch_page_table();
+        }
+    }
+
+    /// # Safety
+    /// This function is unsafe because the caller should make sure that the `inner` is not currently used.
+    pub unsafe fn release(&self) {
+        // TODO: Check whether we should wake someone up if they've been put to sleep when calling `vfork`.
+        self.inner.swap(None);
+        self.root_page_table.store(0, Ordering::Relaxed);
     }
 
     /// No need to do invalidation manually, `PageTable` already does it.
     pub fn unmap(&self, start: VAddr, len: usize) -> KResult<()> {
-        self.inner.lock_irq().unmap(&self.page_table, start, len)
+        self.inner.borrow().lock_irq().unmap(start, len)
     }
 
     pub fn mmap_hint(
@@ -261,18 +293,20 @@ impl MMList {
         mapping: Mapping,
         permission: Permission,
     ) -> KResult<VAddr> {
-        let mut inner = self.inner.lock_irq();
+        let inner = self.inner.borrow();
+        let mut inner = inner.lock_irq();
+
         if hint == VAddr::NULL {
             let at = inner.find_available(hint, len).ok_or(ENOMEM)?;
-            inner.mmap(&self.page_table, at, len, mapping, permission)?;
+            inner.mmap(at, len, mapping, permission)?;
             return Ok(at);
         }
 
-        match inner.mmap(&self.page_table, hint, len, mapping.clone(), permission) {
+        match inner.mmap(hint, len, mapping.clone(), permission) {
             Ok(()) => Ok(hint),
             Err(EEXIST) => {
                 let at = inner.find_available(hint, len).ok_or(ENOMEM)?;
-                inner.mmap(&self.page_table, at, len, mapping, permission)?;
+                inner.mmap(at, len, mapping, permission)?;
                 Ok(at)
             }
             Err(err) => Err(err),
@@ -287,13 +321,15 @@ impl MMList {
         permission: Permission,
     ) -> KResult<VAddr> {
         self.inner
+            .borrow()
             .lock_irq()
-            .mmap(&self.page_table, at, len, mapping.clone(), permission)
+            .mmap(at, len, mapping.clone(), permission)
             .map(|_| at)
     }
 
     pub fn set_break(&self, pos: Option<VAddr>) -> VAddr {
-        let mut inner = self.inner.lock_irq();
+        let inner = self.inner.borrow();
+        let mut inner = inner.lock_irq();
 
         // SAFETY: `set_break` is only called in syscalls, where program break should be valid.
         assert!(inner.break_start.is_some() && inner.break_pos.is_some());
@@ -326,7 +362,7 @@ impl MMList {
             .expect("Program break area should be valid");
 
         let len = pos - current_break;
-        self.page_table.set_anonymous(
+        inner.page_table.set_anonymous(
             VRange::from(program_break.range().end()).grow(len),
             Permission {
                 write: true,
@@ -342,19 +378,81 @@ impl MMList {
 
     /// This should be called only **once** for every thread.
     pub fn register_break(&self, start: VAddr) {
-        let mut inner = self.inner.lock_irq();
+        let inner = self.inner.borrow();
+        let mut inner = inner.lock_irq();
         assert!(inner.break_start.is_none() && inner.break_pos.is_none());
 
         inner.break_start = Some(start.into());
         inner.break_pos = Some(start);
     }
-}
 
-impl Drop for MMList {
-    fn drop(&mut self) {
-        let inner = self.inner.get_mut();
-        assert!(inner.areas.is_empty());
-        assert_eq!(inner.break_start, None);
-        assert_eq!(inner.break_pos, None);
+    /// Access the memory area with the given function.
+    /// The function will be called with the offset of the area and the slice of the area.
+    pub fn access_mut<F>(&self, start: VAddr, len: usize, func: F) -> KResult<()>
+    where
+        F: Fn(usize, &mut [u8]),
+    {
+        // First, validate the address range.
+        let end = start + len;
+        if !start.is_user() || !end.is_user() {
+            return Err(EINVAL);
+        }
+
+        let inner = self.inner.borrow();
+        let inner = inner.lock_irq();
+
+        let mut offset = 0;
+        let mut remaining = len;
+        let mut current = start;
+
+        while remaining > 0 {
+            let area = inner.overlapping_addr(current).ok_or(EFAULT)?;
+
+            let area_start = area.range().start();
+            let area_end = area.range().end();
+            let area_remaining = area_end - current;
+
+            let access_len = remaining.min(area_remaining);
+            let access_end = current + access_len;
+
+            for (idx, pte) in inner
+                .page_table
+                .iter_user(VRange::new(current, access_end))?
+                .enumerate()
+            {
+                let page_start = current.floor() + idx * 0x1000;
+                let page_end = page_start + 0x1000;
+
+                area.handle(pte, page_start - area_start)?;
+
+                let start_offset;
+                if page_start < current {
+                    start_offset = current - page_start;
+                } else {
+                    start_offset = 0;
+                }
+
+                let end_offset;
+                if page_end > access_end {
+                    end_offset = access_end - page_start;
+                } else {
+                    end_offset = 0x1000;
+                }
+
+                unsafe {
+                    let page = Page::from_pfn(pte.pfn(), 0);
+                    func(
+                        offset + idx * 0x1000,
+                        &mut page.as_mut_slice()[start_offset..end_offset],
+                    );
+                }
+            }
+
+            offset += access_len;
+            remaining -= access_len;
+            current = access_end;
+        }
+
+        Ok(())
     }
 }

+ 6 - 72
src/kernel/mem/mm_list/page_fault.rs

@@ -36,7 +36,9 @@ impl MMList {
         addr: VAddr,
         error: PageFaultError,
     ) -> Result<(), Signal> {
-        let inner = self.inner.lock();
+        let inner = self.inner.borrow();
+        let inner = inner.lock();
+
         let area = match inner.areas.get(&VRange::from(addr)) {
             Some(area) => area,
             None => {
@@ -60,7 +62,7 @@ impl MMList {
             }
         }
 
-        let pte = self
+        let pte = inner
             .page_table
             .iter_user(VRange::new(addr.floor(), addr.floor() + 0x1000))
             .unwrap()
@@ -73,76 +75,8 @@ impl MMList {
             return Ok(());
         }
 
-        let mut pfn = pte.pfn();
-        let mut attributes = pte.attributes();
-
-        if attributes & PA_COW as usize != 0 {
-            attributes &= !PA_COW as usize;
-            if area.permission.write {
-                attributes |= PA_RW as usize;
-            } else {
-                attributes &= !PA_RW as usize;
-            }
-
-            let page = unsafe { Page::take_pfn(pfn, 0) };
-            if unsafe { page.load_refcount() } == 1 {
-                // SAFETY: This is actually safe. If we read `1` here and we have `MMList` lock
-                // held, there couldn't be neither other processes sharing the page, nor other
-                // threads making the page COW at the same time.
-                pte.set_attributes(attributes);
-                core::mem::forget(page);
-                return Ok(());
-            }
-
-            let new_page = Page::alloc_one();
-            if attributes & PA_ANON as usize != 0 {
-                new_page.zero();
-            } else {
-                new_page.as_mut_slice().copy_from_slice(page.as_slice());
-            }
-
-            attributes &= !(PA_A | PA_ANON) as usize;
-
-            pfn = new_page.into_pfn();
-            pte.set(pfn, attributes);
-        }
-
-        // TODO: shared mapping
-        if attributes & PA_MMAP as usize != 0 {
-            attributes |= PA_P as usize;
-
-            if let Mapping::File(mapping) = &area.mapping {
-                let load_offset = addr.floor() - area.range().start();
-                if load_offset < mapping.length {
-                    // SAFETY: Since we are here, the `pfn` must refer to a valid buddy page.
-                    let page = unsafe { Page::from_pfn(pfn, 0) };
-                    let nread = mapping
-                        .file
-                        .read(
-                            &mut PageBuffer::new(page.clone()),
-                            mapping.offset + load_offset,
-                        )
-                        .map_err(|_| Signal::SIGBUS)?;
-
-                    if nread < page.len() {
-                        page.as_mut_slice()[nread..].fill(0);
-                    }
-
-                    if mapping.length - load_offset < 0x1000 {
-                        let length_to_end = mapping.length - load_offset;
-                        page.as_mut_slice()[length_to_end..].fill(0);
-                    }
-                }
-                // Otherwise, the page is kept zero emptied.
-
-                attributes &= !PA_MMAP as usize;
-                pte.set_attributes(attributes);
-            } else {
-                panic!("Anonymous mapping should not be PA_MMAP");
-            }
-        }
-
-        Ok(())
+        area.handle(pte, addr.floor() - area.range().start())
+            .map_err(|_| Signal::SIGBUS)
     }
 }
 

+ 12 - 4
src/kernel/mem/page_table.rs

@@ -68,6 +68,14 @@ impl PTE {
         self.0 & PA_P != 0
     }
 
+    pub fn is_cow(&self) -> bool {
+        self.0 & PA_COW != 0
+    }
+
+    pub fn is_mmap(&self) -> bool {
+        self.0 & PA_MMAP != 0
+    }
+
     pub fn pfn(&self) -> usize {
         self.0 & !PA_MASK
     }
@@ -212,6 +220,10 @@ impl PageTable {
         Self { page }
     }
 
+    pub fn root_page_table(&self) -> usize {
+        self.page.as_phys()
+    }
+
     pub fn iter_user(&self, range: VRange) -> KResult<PTEIterator<'_, false>> {
         PTEIterator::new(&self.page, range.start().floor(), range.end().ceil())
     }
@@ -220,10 +232,6 @@ impl PageTable {
         PTEIterator::new(&self.page, range.start().floor(), range.end().ceil())
     }
 
-    pub fn switch(&self) {
-        arch::set_root_page_table(self.page.as_phys())
-    }
-
     pub fn unmap(&self, area: &MMArea) {
         let range = area.range();
         let use_invlpg = range.len() / 4096 < 4;

+ 3 - 2
src/kernel/syscall/procops.rs

@@ -94,8 +94,9 @@ fn do_execve(exec: &[u8], argv: Vec<CString>, envp: Vec<CString>) -> KResult<(VA
     // TODO: When `execve` is called by one of the threads in a process, the other threads
     //       should be terminated and `execve` is performed in the thread group leader.
     let elf = ParsedElf32::parse(dentry.clone())?;
-    let result = elf.load(&Thread::current().process.mm_list, argv, envp);
-    if let Ok((ip, sp)) = result {
+    let result = elf.load(argv, envp);
+    if let Ok((ip, sp, mm_list)) = result {
+        Thread::current().process.mm_list.replace(mm_list);
         Thread::current().files.on_exec();
         Thread::current().signal_list.clear_non_ignore();
         Thread::current().set_name(dentry.name().clone());

+ 1 - 1
src/kernel/task/process.rs

@@ -29,7 +29,7 @@ pub struct Process {
     pub pid: u32,
 
     pub wait_list: WaitList,
-    pub mm_list: Arc<MMList>,
+    pub mm_list: MMList,
 
     /// Parent process
     ///

+ 7 - 2
src/kernel/task/process_list.rs

@@ -2,6 +2,7 @@ use alloc::{
     collections::btree_map::BTreeMap,
     sync::{Arc, Weak},
 };
+use bindings::KERNEL_PML4;
 
 use crate::{
     prelude::*,
@@ -145,8 +146,12 @@ impl ProcessList {
             }
         }
 
-        // Unmap all user memory areas
-        process.mm_list.clear_user();
+        // Release the MMList as well as the page table.
+        // Before we release the page table, we need to switch to the kernel page table.
+        arch::set_root_page_table(KERNEL_PML4 as usize);
+        unsafe {
+            process.mm_list.release();
+        }
 
         // Make children orphans (adopted by init)
         {

+ 3 - 3
src/lib.rs

@@ -164,7 +164,7 @@ extern "C" fn init_process(/* early_kstack_pfn: usize */) {
 
     unsafe { kernel::smp::bootstrap_smp() };
 
-    let (ip, sp) = {
+    let (ip, sp, mm_list) = {
         // mount fat32 /mnt directory
         let fs_context = FsContext::get_current();
         let mnt_dir = Dentry::open(&fs_context, Path::new(b"/mnt/").unwrap(), true).unwrap();
@@ -197,10 +197,10 @@ extern "C" fn init_process(/* early_kstack_pfn: usize */) {
         ];
 
         let elf = ParsedElf32::parse(init.clone()).unwrap();
-        elf.load(&Thread::current().process.mm_list, argv, envp)
-            .unwrap()
+        elf.load(argv, envp).unwrap()
     };
 
+    Thread::current().process.mm_list.replace(mm_list);
     Thread::current().files.open_console();
 
     unsafe {

+ 2 - 0
src/sync.rs

@@ -1,3 +1,4 @@
+mod arcswap;
 mod condvar;
 pub mod lock;
 mod locked;
@@ -85,5 +86,6 @@ macro_rules! might_sleep {
     };
 }
 
+pub use arcswap::ArcSwap;
 pub use locked::{AsRefMutPosition, AsRefPosition, Locked, RefMutPosition, RefPosition};
 pub(crate) use might_sleep;

+ 49 - 0
src/sync/arcswap.rs

@@ -0,0 +1,49 @@
+use core::{
+    fmt::{self, Debug, Formatter},
+    sync::atomic::{AtomicPtr, Ordering},
+};
+
+use alloc::sync::Arc;
+
+use crate::BorrowedArc;
+
+unsafe impl<T> Send for ArcSwap<T> where T: Send + Sync {}
+unsafe impl<T> Sync for ArcSwap<T> where T: Send + Sync {}
+
+pub struct ArcSwap<T> {
+    pointer: AtomicPtr<T>,
+}
+
+impl<T> ArcSwap<T> {
+    pub fn new(data: T) -> Self {
+        let pointer = Arc::into_raw(Arc::new(data));
+        Self {
+            pointer: AtomicPtr::new(pointer as *mut T),
+        }
+    }
+
+    /// # Safety
+    /// The caller must ensure that the pointer not used elsewhere before ACTUALLLY dropping that.
+    pub fn swap(&self, data: Option<Arc<T>>) -> Option<Arc<T>> {
+        let new_pointer = data.map(Arc::into_raw).unwrap_or(core::ptr::null());
+        let old_pointer = self.pointer.swap(new_pointer as *mut _, Ordering::AcqRel);
+        if old_pointer.is_null() {
+            None
+        } else {
+            Some(unsafe { Arc::from_raw(old_pointer) })
+        }
+    }
+
+    pub fn borrow(&self) -> BorrowedArc<T> {
+        BorrowedArc::from_raw(self.pointer.load(Ordering::Relaxed))
+    }
+}
+
+impl<T> Debug for ArcSwap<T>
+where
+    T: Debug,
+{
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        write!(f, "ArcSwap {{ {:?} }}", self.borrow().as_ref())
+    }
+}