Explorar el Código

fix: fifo, paging, tkill, preemptive kernel

make syscall handler preemptive

fix fifo read and write

clear buddy order field while freeing

when unrecoverable error occurred in elf32_load, do not kill current
        process on place

add kmsg_debug
greatbridf hace 9 meses
padre
commit
23ec09cb64

+ 4 - 4
CMakeLists.txt

@@ -14,11 +14,11 @@ SET(CMAKE_ASM_FLAGS "${CFLAGS} -x assembler-with-cpp")
 set(CMAKE_CXX_STANDARD 20)
 
 if (CMAKE_BUILD_TYPE STREQUAL "Debug")
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O0 -g")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -g")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DDEBUG -O0 -g")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDEBUG -O0 -g")
 elseif(CMAKE_BUILD_TYPE STREQUAL "Release")
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2 -DNDEBUG")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 -DNDEBUG")
 endif()
 
 if (NOT DEFINED FDISK_BIN)

+ 6 - 0
include/kernel/log.hpp

@@ -12,3 +12,9 @@
     }
 
 #define kmsg(msg) if (kernel::tty::console) kernel::tty::console->print(msg "\n")
+
+#ifdef NDEBUG
+#define kmsgf_debug(...)
+#else
+#define kmsgf_debug(...) kmsgf(__VA_ARGS__)
+#endif

+ 9 - 2
include/kernel/signal.hpp

@@ -9,6 +9,7 @@
 
 #include <types/cplusplus.hpp>
 
+#include <kernel/async/lock.hpp>
 #include <kernel/interrupt.hpp>
 
 namespace kernel {
@@ -31,6 +32,7 @@ private:
     list_type m_list;
     sigmask_type m_mask { };
     std::map<signo_type, sigaction> m_handlers;
+    async::mutex m_mtx;
 
 public:
     static constexpr bool check_valid(signo_type sig)
@@ -40,8 +42,13 @@ public:
 
 public:
     constexpr signal_list() = default;
-    constexpr signal_list(const signal_list& val) = default;
-    constexpr signal_list(signal_list&& val) = default;
+    constexpr signal_list(const signal_list& val)
+        : m_list{val.m_list}, m_mask{val.m_mask}
+        , m_handlers{val.m_handlers}, m_mtx{} { }
+
+    constexpr signal_list(signal_list&& val)
+        : m_list{std::move(val.m_list)}, m_mask{std::move(val.m_mask)}
+        , m_handlers{std::move(val.m_handlers)}, m_mtx{} { }
 
     void on_exec();
 

+ 1 - 0
include/kernel/syscall.hpp

@@ -83,6 +83,7 @@ char __user* do_getcwd(char __user* buf, size_t buf_size);
 uintptr_t do_brk(uintptr_t addr);
 int do_umask(mode_t mask);
 int do_kill(pid_t pid, int sig);
+int do_tkill(pid_t pid, int sig);
 int do_rt_sigprocmask(int how, const kernel::sigmask_type __user* set,
         kernel::sigmask_type __user* oldset, size_t sigsetsize);
 int do_rt_sigaction(int signum, const sigaction __user* act,

+ 10 - 8
include/kernel/vfs/file.hpp

@@ -21,30 +21,32 @@ private:
 
 private:
     types::buffer buf;
-    kernel::async::wait_list waitlist;
-    kernel::async::mutex mtx;
     uint32_t flags;
+    kernel::async::mutex mtx;
+
+    kernel::async::wait_list waitlist_r;
+    kernel::async::wait_list waitlist_w;
 
 public:
-    pipe(void);
+    pipe();
 
-    void close_read(void);
-    void close_write(void);
+    void close_read();
+    void close_write();
 
     int write(const char* buf, size_t n);
     int read(char* buf, size_t n);
 
-    constexpr bool is_readable(void) const
+    constexpr bool is_readable() const
     {
         return flags & READABLE;
     }
 
-    constexpr bool is_writeable(void) const
+    constexpr bool is_writeable() const
     {
         return flags & WRITABLE;
     }
 
-    constexpr bool is_free(void) const
+    constexpr bool is_free() const
     {
         return !(flags & (READABLE | WRITABLE));
     }

+ 2 - 0
include/types/elf.hpp

@@ -19,6 +19,8 @@ constexpr elf32_addr_t ELF32_STACK_BOTTOM = 0xbffff000;
 constexpr elf32_off_t ELF32_STACK_SIZE = 8 * 1024 * 1024;
 constexpr elf32_addr_t ELF32_STACK_TOP = ELF32_STACK_BOTTOM - ELF32_STACK_SIZE;
 
+constexpr int ELF_LOAD_FAIL_NORETURN = 0x114514;
+
 struct PACKED elf32_header {
     // 0x7f, "ELF"
     char magic[4];

+ 1 - 1
src/kernel/mem/paging.cc

@@ -229,7 +229,7 @@ void kernel::mem::paging::free_pages(page* pg, unsigned order)
         if (buddy_page < pg)
             std::swap(buddy_page, pg);
 
-        buddy_page->flags &= ~PAGE_BUDDY;
+        buddy_page->flags &= ~(PAGE_BUDDY | 0xff);
         order++;
     }
 

+ 18 - 9
src/kernel/process.cpp

@@ -26,7 +26,7 @@
 #include <kernel/vfs.hpp>
 
 using kernel::async::mutex;
-using kernel::async::lock_guard, kernel::async::lock_guard_irq;
+using kernel::async::lock_guard;
 
 static void (*volatile kthreadd_new_thd_func)(void*);
 static void* volatile kthreadd_new_thd_data;
@@ -322,6 +322,8 @@ void proclist::kill(pid_t pid, int exit_code)
         freeze();
     }
 
+    kernel::async::preempt_disable();
+
     // put all threads into sleep
     for (auto& thd : proc.thds)
         thd.set_attr(kernel::task::thread::ZOMBIE);
@@ -343,10 +345,10 @@ void proclist::kill(pid_t pid, int exit_code)
 
     bool flag = false;
     if (1) {
-        lock_guard_irq lck(init.mtx_waitprocs);
+        lock_guard lck(init.mtx_waitprocs);
 
         if (1) {
-            lock_guard_irq lck(proc.mtx_waitprocs);
+            lock_guard lck(proc.mtx_waitprocs);
 
             for (const auto& item : proc.waitprocs) {
                 if (WIFSTOPPED(item.code) || WIFCONTINUED(item.code))
@@ -364,11 +366,13 @@ void proclist::kill(pid_t pid, int exit_code)
         init.waitlist.notify_all();
 
     if (1) {
-        lock_guard_irq lck(parent.mtx_waitprocs);
+        lock_guard lck(parent.mtx_waitprocs);
         parent.waitprocs.push_back({ pid, exit_code });
     }
 
     parent.waitlist.notify_all();
+
+    kernel::async::preempt_enable();
 }
 
 static void release_kinit()
@@ -522,11 +526,8 @@ extern "C" void after_ctx_switch()
     current_thread->load_thread_area32();
 }
 
-bool schedule()
+bool _schedule()
 {
-    if (kernel::async::preempt_count() != 0)
-        return true;
-
     auto* next_thd = kernel::task::dispatcher::next();
 
     if (current_thread != next_thd) {
@@ -545,9 +546,17 @@ bool schedule()
     return current_thread->signals.pending_signal() == 0;
 }
 
+bool schedule()
+{
+    if (kernel::async::preempt_count() != 0)
+        return true;
+
+    return _schedule();
+}
+
 void NORETURN schedule_noreturn(void)
 {
-    schedule();
+    _schedule();
     freeze();
 }
 

+ 17 - 11
src/kernel/signal.cpp

@@ -1,7 +1,8 @@
-#include <kernel/task/thread.hpp>
+#include <kernel/async/lock.hpp>
+#include <kernel/interrupt.hpp>
 #include <kernel/process.hpp>
 #include <kernel/signal.hpp>
-#include <kernel/interrupt.hpp>
+#include <kernel/task/thread.hpp>
 
 #include <signal.h>
 
@@ -40,6 +41,7 @@ static void stop_process(int signal)
 
     // signal parent we're stopped
     parent.waitprocs.push_back({ current_process->pid, 0x7f });
+    parent.waitlist.notify_all();
 
     while (true) {
         if (schedule())
@@ -61,13 +63,10 @@ static void terminate_process_with_core_dump(int signo)
 
 void signal_list::set_handler(signo_type signal, const sigaction& action)
 {
-    if (action.sa_handler == SIG_DFL) {
+    if (action.sa_handler == SIG_DFL)
         m_handlers.erase(signal);
-        return;
-    }
-    else {
+    else
         m_handlers[signal] = action;
-    }
 }
 
 void signal_list::get_handler(signo_type signal, sigaction& action) const
@@ -93,6 +92,8 @@ void signal_list::on_exec()
 
 bool signal_list::raise(signo_type signal)
 {
+    async::lock_guard lck{m_mtx};
+
     // TODO: clear pending signals
     if (signal == SIGCONT) {
         m_list.remove_if([](signo_type sig) {
@@ -124,6 +125,7 @@ bool signal_list::raise(signo_type signal)
 
 signo_type signal_list::pending_signal()
 {
+    async::lock_guard lck{m_mtx};
     for (auto iter = m_list.begin(); iter != m_list.end(); ++iter) {
         auto iter_handler = m_handlers.find(*iter);
 
@@ -148,9 +150,13 @@ signo_type signal_list::pending_signal()
 
 void signal_list::handle(interrupt_stack_normal* context, mmx_registers* mmxregs)
 {
-    // assume that the pending signal is at the front of the list
-    auto signal = m_list.front();
-    m_list.pop_front();
+    unsigned int signal;
+    if (1) {
+        async::lock_guard lck{m_mtx};
+        // assume that the pending signal is at the front of the list
+        signal = m_list.front();
+        m_list.pop_front();
+    }
 
     // default handlers
     if (sigmask(signal) & sigmask_now) {
@@ -204,7 +210,7 @@ void signal_list::handle(interrupt_stack_normal* context, mmx_registers* mmxregs
 
 void signal_list::after_signal(signo_type signal)
 {
-    this->m_mask &= ~sigmask(signal);
+    m_mask &= ~sigmask(signal);
 }
 
 kernel::sigmask_type signal_list::get_mask() const { return m_mask; }

+ 18 - 4
src/kernel/syscall.cpp

@@ -100,7 +100,12 @@ static uint32_t _syscall32_##name(interrupt_stack_normal* data, mmx_registers* m
     _DEFINE_SYSCALL32_END(name, __VA_ARGS__); \
 }
 
-static uint32_t (*syscall_handlers[SYSCALL_HANDLERS_SIZE])(interrupt_stack_normal*, mmx_registers*);
+struct syscall_handler_t {
+    uint32_t (*handler)(interrupt_stack_normal*, mmx_registers*);
+    const char* name;
+};
+
+static syscall_handler_t syscall_handlers[SYSCALL_HANDLERS_SIZE];
 
 static inline void not_implemented(const char* pos, int line)
 {
@@ -169,6 +174,7 @@ DEFINE_SYSCALL32(arch_prctl, int, option, uintptr_t, arg2)
 DEFINE_SYSCALL32(brk, uintptr_t, addr)
 DEFINE_SYSCALL32(umask, mode_t, mask)
 DEFINE_SYSCALL32(kill, pid_t, pid, int, sig)
+DEFINE_SYSCALL32(tkill, pid_t, tid, int, sig)
 DEFINE_SYSCALL32(rt_sigprocmask, int, how,
         const kernel::sigmask_type __user*, set,
         kernel::sigmask_type __user*, oldset, size_t, sigsetsize)
@@ -191,6 +197,7 @@ static uint32_t _syscall32_fork(interrupt_stack_normal* data, mmx_registers* mmx
     assert(inserted);
     auto* newthd = &*iter_newthd;
 
+    kernel::async::preempt_disable();
     kernel::task::dispatcher::enqueue(newthd);
 
     auto newthd_prev_sp = newthd->kstack.sp;
@@ -239,6 +246,7 @@ static uint32_t _syscall32_fork(interrupt_stack_normal* data, mmx_registers* mmx
     newthd->kstack.pushq(0);              // 0 for alignment
     newthd->kstack.pushq(newthd_prev_sp); // previous sp
 
+    kernel::async::preempt_enable();
     return newproc.pid;
 }
 
@@ -360,7 +368,7 @@ static uint32_t _syscall32_wait4(interrupt_stack_normal* data, mmx_registers* mm
 
 void kernel::handle_syscall32(int no, interrupt_stack_normal* data, mmx_registers* mmxregs)
 {
-    if (no >= SYSCALL_HANDLERS_SIZE || !syscall_handlers[no]) {
+    if (no >= SYSCALL_HANDLERS_SIZE || !syscall_handlers[no].handler) {
         kmsgf("[kernel] syscall %d(%x) isn't implemented", no, no);
         NOT_IMPLEMENTED;
 
@@ -369,7 +377,10 @@ void kernel::handle_syscall32(int no, interrupt_stack_normal* data, mmx_register
         return;
     }
 
-    data->head.s_regs.rax = syscall_handlers[no](data, mmxregs);
+    // kmsgf_debug("[kernel:debug] (pid\t%d) %s()", current_process->pid, syscall_handlers[no].name);
+
+    asm volatile("sti");
+    data->head.s_regs.rax = syscall_handlers[no].handler(data, mmxregs);
     data->head.s_regs.r8 = 0;
     data->head.s_regs.r9 = 0;
     data->head.s_regs.r10 = 0;
@@ -383,7 +394,9 @@ void kernel::handle_syscall32(int no, interrupt_stack_normal* data, mmx_register
         current_thread->signals.handle(data, mmxregs);
 }
 
-#define REGISTER_SYSCALL_HANDLER(no, name) syscall_handlers[(no)] = _syscall32_ ## name
+#define REGISTER_SYSCALL_HANDLER(no, _name) \
+    syscall_handlers[(no)].handler = _syscall32_ ## _name; \
+    syscall_handlers[(no)].name = #_name; \
 
 SECTION(".text.kinit")
 void kernel::init_syscall_table()
@@ -441,6 +454,7 @@ void kernel::init_syscall_table()
     REGISTER_SYSCALL_HANDLER(0xdc, getdents64);
     REGISTER_SYSCALL_HANDLER(0xdd, fcntl64);
     REGISTER_SYSCALL_HANDLER(0xe0, gettid);
+    REGISTER_SYSCALL_HANDLER(0xee, tkill);
     REGISTER_SYSCALL_HANDLER(0xef, sendfile64);
     REGISTER_SYSCALL_HANDLER(0xf3, set_thread_area);
     REGISTER_SYSCALL_HANDLER(0xfc, exit_group);

+ 0 - 6
src/kernel/syscall/fileops.cc

@@ -358,12 +358,6 @@ ssize_t kernel::syscall::do_sendfile(int out_fd, int in_fd,
         if (ret < 0)
             return ret;
         totn += ret;
-
-        // TODO: this won't work, since when we are in the syscall handler,
-        //       interrupts are blocked.
-        //       one solution is to put the sendfile action into a kernel
-        //       worker and pause the calling thread so that the worker
-        //       thread could be interrupted normally.
     }
 
     return totn;

+ 32 - 2
src/kernel/syscall/procops.cc

@@ -7,6 +7,7 @@
 
 #include <types/elf.hpp>
 
+#include <kernel/async/lock.hpp>
 #include <kernel/log.hpp>
 #include <kernel/process.hpp>
 #include <kernel/signal.hpp>
@@ -60,11 +61,19 @@ execve_retval kernel::syscall::do_execve(
 
     current_process->files.onexec();
 
+    async::preempt_disable();
+
     // TODO: set cs and ss to compatibility mode
-    if (int ret = types::elf::elf32_load(d); ret != 0)
+    if (int ret = types::elf::elf32_load(d); ret != 0) {
+        async::preempt_enable();
+        if (ret == types::elf::ELF_LOAD_FAIL_NORETURN)
+            kill_current(SIGSEGV);
+
         return { 0, 0, ret };
+    }
 
     current_thread->signals.on_exec();
+    async::preempt_enable();
 
     return { d.ip, d.sp, 0 };
 }
@@ -88,7 +97,7 @@ int kernel::syscall::do_waitpid(pid_t waitpid, int __user* arg1, int options)
         return -EINVAL;
 
     auto& cv = current_process->waitlist;
-    kernel::async::lock_guard lck(current_process->mtx_waitprocs);
+    async::lock_guard lck(current_process->mtx_waitprocs);
 
     auto& waitlist = current_process->waitprocs;
 
@@ -278,6 +287,27 @@ int kernel::syscall::do_kill(pid_t pid, int sig)
     return 0;
 }
 
+int kernel::syscall::do_tkill(pid_t tid, int sig)
+{
+    NOT_IMPLEMENTED;
+    return -EINVAL;
+
+    auto [ pproc, found ] = procs->try_find(tid);
+    if (!found)
+        return -ESRCH;
+
+    if (!kernel::signal_list::check_valid(sig))
+        return -EINVAL;
+
+    if (pproc->is_system())
+        return 0;
+
+    // TODO: check permission
+    procs->send_signal(tid, sig);
+
+    return 0;
+}
+
 int kernel::syscall::do_rt_sigprocmask(int how, const sigmask_type __user* set,
         sigmask_type __user* oldset, size_t sigsetsize)
 {

+ 2 - 2
src/kernel/tty.cpp

@@ -57,7 +57,7 @@ void tty::print(const char* str)
 
 int tty::poll()
 {
-    kernel::async::lock_guard lck(this->mtx_buf);
+    async::lock_guard_irq lck(this->mtx_buf);
     if (this->buf.empty()) {
         bool interrupted = this->waitlist.wait(this->mtx_buf);
 
@@ -77,7 +77,7 @@ ssize_t tty::read(char* buf, size_t buf_size, size_t n)
         if (n == 0)
             break;
 
-        kernel::async::lock_guard lck(this->mtx_buf);
+        async::lock_guard_irq lck(this->mtx_buf);
 
         if (this->buf.empty()) {
             bool interrupted = this->waitlist.wait(this->mtx_buf);

+ 54 - 47
src/kernel/vfs.cpp

@@ -803,36 +803,32 @@ fs::pipe::pipe(void)
 
 void fs::pipe::close_read(void)
 {
-    if (1) {
-        kernel::async::lock_guard lck(mtx);
-        flags &= (~READABLE);
-    }
-    waitlist.notify_all();
+    kernel::async::lock_guard lck{mtx};
+    flags &= (~READABLE);
+    waitlist_w.notify_all();
 }
 
 void fs::pipe::close_write(void)
 {
-    if (1) {
-        kernel::async::lock_guard lck(mtx);
-        flags &= (~WRITABLE);
-    }
-    waitlist.notify_all();
+    kernel::async::lock_guard lck{mtx};
+    flags &= (~WRITABLE);
+    waitlist_r.notify_all();
 }
 
 int fs::pipe::write(const char* buf, size_t n)
 {
     // TODO: check privilege
     // TODO: check EPIPE
-    if (1) {
-        kernel::async::lock_guard lck(mtx);
+    kernel::async::lock_guard lck{mtx};
 
-        if (!is_readable()) {
-            current_thread->send_signal(SIGPIPE);
-            return -EPIPE;
-        }
+    if (!is_readable()) {
+        current_thread->send_signal(SIGPIPE);
+        return -EPIPE;
+    }
 
+    if (n <= PIPE_SIZE) {
         while (this->buf.avail() < n) {
-            bool interrupted = waitlist.wait(mtx);
+            bool interrupted = waitlist_w.wait(mtx);
             if (interrupted)
                 return -EINTR;
 
@@ -844,50 +840,61 @@ int fs::pipe::write(const char* buf, size_t n)
 
         for (size_t i = 0; i < n; ++i)
             this->buf.put(*(buf++));
+
+        waitlist_r.notify_all();
+
+        return n;
     }
 
-    waitlist.notify_all();
-    return n;
+    size_t orig_n = n;
+    while (true) {
+        bool write = false;
+        while (n && !this->buf.full()) {
+            --n, this->buf.put(*(buf++));
+            write = true;
+        }
+
+        if (write)
+            waitlist_r.notify_all();
+
+        if (n == 0)
+            break;
+
+        bool interrupted = waitlist_w.wait(mtx);
+        if (interrupted)
+            return -EINTR;
+
+        if (!is_readable()) {
+            current_thread->send_signal(SIGPIPE);
+            return -EPIPE;
+        }
+    }
+
+    return orig_n - n;
 }
 
 int fs::pipe::read(char* buf, size_t n)
 {
     // TODO: check privilege
-    if (1) {
-        kernel::async::lock_guard lck(mtx);
-
-        if (!is_writeable()) {
-            size_t orig_n = n;
-            while (!this->buf.empty() && n) {
-                --n;
-                *(buf++) = this->buf.get();
-            }
-
-            return orig_n - n;
-        }
+    kernel::async::lock_guard lck{mtx};
+    size_t orig_n = n;
 
-        while (this->buf.size() < n) {
-            bool interrupted = waitlist.wait(mtx);
+    if (n <= PIPE_SIZE || this->buf.empty()) {
+        while (is_writeable() && this->buf.size() < n) {
+            bool interrupted = waitlist_r.wait(mtx);
             if (interrupted)
                 return -EINTR;
 
-            if (!is_writeable()) {
-                size_t orig_n = n;
-                while (!this->buf.empty() && n) {
-                    --n;
-                    *(buf++) = this->buf.get();
-                }
-
-                return orig_n - n;
-            }
+            if (n > PIPE_SIZE)
+                break;
         }
-
-        for (size_t i = 0; i < n; ++i)
-            *(buf++) = this->buf.get();
     }
 
-    waitlist.notify_all();
-    return n;
+    while (!this->buf.empty() && n)
+        --n, *(buf++) = this->buf.get();
+
+    waitlist_w.notify_all();
+    return orig_n - n;
 }
 
 SECTION(".text.kinit")

+ 3 - 4
src/types/elf.cpp

@@ -107,7 +107,7 @@ int types::elf::elf32_load(types::elf::elf32_load_data& d)
                 args.flags |= MM_EXECUTE;
 
             if (auto ret = mms.mmap(args); ret != 0)
-                kill_current(SIGSEGV);
+                return ELF_LOAD_FAIL_NORETURN;
         }
 
         if (vlen > flen) {
@@ -124,7 +124,7 @@ int types::elf::elf32_load(types::elf::elf32_load_data& d)
                 args.flags |= MM_EXECUTE;
 
             if (auto ret = mms.mmap(args); ret != 0)
-                kill_current(SIGSEGV);
+                return ELF_LOAD_FAIL_NORETURN;
         }
 
         if (vaddr + vlen > data_segment_end)
@@ -148,8 +148,7 @@ int types::elf::elf32_load(types::elf::elf32_load_data& d)
         args.flags = MM_ANONYMOUS | MM_WRITE;
 
         if (auto ret = mms.mmap(args); ret != 0)
-            kill_current(SIGSEGV);
-        // TODO: deconstruct local variables before calling kill_current
+            return ELF_LOAD_FAIL_NORETURN;
     }
 
     d.ip = hdr.entry;