greatbridf 9 months ago
parent
commit
73ab9a9320

+ 3 - 1
CMakeLists.txt

@@ -43,9 +43,11 @@ set(KERNEL_MAIN_SOURCES src/fs/fat.cpp
                         src/kernel/interrupt.cpp
                         src/kernel/process.cpp
                         src/kernel/tty.cpp
-                        # src/kernel/syscall.cpp
+                        src/kernel/syscall.cpp
                         src/kernel/syscall/fileops.cc
+                        src/kernel/syscall/infoops.cc
                         src/kernel/syscall/mount.cc
+                        src/kernel/syscall/procops.cc
                         src/kernel/mem/mm_list.cc
                         src/kernel/mem/paging.cc
                         src/kernel/mem/slab.cc

+ 1 - 1
Makefile.src

@@ -42,7 +42,7 @@ clean-all: clean
 
 .PHONY: debug
 debug:
-	$(GDB_BIN) --symbols=build/kernel.out --init-eval-command 'source pretty-print.py' --init-eval-command 'set pagination off' --init-eval-command 'target remote:1234' --init-eval-command 'layout regs' --eval-command 'hbr _kernel_init' --eval-command 'c'
+	-$(GDB_BIN) --symbols=build/kernel.out --init-eval-command 'source pretty-print.py' --init-eval-command 'set pagination off' --init-eval-command 'target remote:1234' --init-eval-command 'layout regs' --eval-command 'hbr _kernel_init' --eval-command 'c'
 	-killall $(QEMU_BIN)
 
 build/boot.vdi: build/boot.img

+ 0 - 1
gblibc/include/bits/alltypes.h

@@ -13,7 +13,6 @@ typedef size_t blkcnt_t;
 struct timespec {
     time_t tv_sec;
     long tv_nsec;
-    int : 32; // padding
 };
 
 struct timeval {

+ 6 - 6
gblibc/include/sys/types.h

@@ -8,16 +8,16 @@ extern "C" {
 #endif
 
 typedef int pid_t;
-typedef uint32_t ino_t;
-typedef int32_t off_t;
-typedef uint32_t dev_t;
+typedef unsigned long ino_t;
+typedef long off_t;
+typedef unsigned dev_t;
 typedef unsigned uid_t;
 typedef unsigned gid_t;
-typedef unsigned mode_t;
+typedef unsigned short mode_t;
 typedef unsigned long nlink_t;
 
-typedef uint64_t ino64_t;
-typedef int64_t off64_t;
+typedef unsigned long long ino64_t;
+typedef long long off64_t;
 
 typedef off64_t loff_t;
 

+ 2 - 2
gblibstdc++/include/string

@@ -493,12 +493,12 @@ public:
 
     constexpr int compare(const basic_string& str) const noexcept
     {
-        return traits_type::compare(c_str(), str.c_str(), size());
+        return traits_type::compare(c_str(), str.c_str(), size()+1);
     }
 
     constexpr int compare(const Char* str) const
     {
-        return traits_type::compare(c_str(), str, size());
+        return traits_type::compare(c_str(), str, size()+1);
     }
 };
 

+ 1 - 1
include/kernel/interrupt.hpp

@@ -9,8 +9,8 @@ struct saved_regs {
     unsigned long rbx;
     unsigned long rcx;
     unsigned long rdx;
-    unsigned long rsi;
     unsigned long rdi;
+    unsigned long rsi;
     unsigned long r8;
     unsigned long r9;
     unsigned long r10;

+ 1 - 1
include/kernel/mem/mm_list.hpp

@@ -11,7 +11,7 @@ namespace kernel::mem {
 
 constexpr uintptr_t KERNEL_SPACE_START    = 0x8000000000000000ULL;
 constexpr uintptr_t USER_SPACE_MEMORY_TOP = 0x0000800000000000ULL;
-constexpr uintptr_t MMAP_MIN_ADDR         = 0x0000600000000000ULL;
+constexpr uintptr_t MMAP_MIN_ADDR         = 0x0000000000001000ULL;
 constexpr uintptr_t STACK_MIN_ADDR        = 0x0000700000000000ULL;
 
 class mm_list {

+ 3 - 3
include/kernel/mem/phys.hpp

@@ -52,13 +52,13 @@ public:
 //  gdt[4]:  user data
 //  gdt[5]:  user code compability mode
 //  gdt[6]:  user data compability mode
-//  gdt[7]:  reserved
+//  gdt[7]:  thread local 32bit
 //  gdt[8]:  tss descriptor low
 //  gdt[9]:  tss descriptor high
 //  gdt[10]: ldt descriptor low
 //  gdt[11]: ldt descriptor high
-//  gdt[12]: thread local(in ldt)
-//  gdt[13]: thread local(in ldt)
+//  gdt[12]: null segment(in ldt)
+//  gdt[13]: thread local 64bit(in ldt)
 // &gdt[14]: tss of 0x68 bytes from here
 constexpr physaddr<uint64_t> gdt{0x00000000 + 1 - 1};
 

+ 106 - 9
include/kernel/syscall.hpp

@@ -1,17 +1,114 @@
 #pragma once
 
+#include <bits/alltypes.h>
+#include <poll.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/utsname.h>
+#include <time.h>
+
 #include <types/types.h>
 
 #include <kernel/interrupt.hpp>
+#include <kernel/signal.hpp>
+#include <kernel/user/thread_local.hpp>
+
+#define SYSCALL64_ARG1(type, name) type name = (type)((data)->head.s_regs.rdi)
+#define SYSCALL64_ARG2(type, name) type name = (type)((data)->head.s_regs.rsi)
+#define SYSCALL64_ARG3(type, name) type name = (type)((data)->head.s_regs.rdx)
+#define SYSCALL64_ARG4(type, name) type name = (type)((data)->head.s_regs.r10)
+#define SYSCALL64_ARG5(type, name) type name = (type)((data)->head.s_regs.r8)
+#define SYSCALL64_ARG6(type, name) type name = (type)((data)->head.s_regs.r9)
+
+namespace kernel {
+void init_syscall_table();
+
+void handle_syscall32(int no, interrupt_stack_normal* data, mmx_registers* mmxregs);
+void handle_syscall64(int no, interrupt_stack_normal* data, mmx_registers* mmxregs);
+
+namespace syscall {
+// in fileops.cc
+ssize_t do_write(int fd, const char __user* buf, size_t n);
+ssize_t do_read(int fd, char __user* buf, size_t n);
+int do_close(int fd);
+int do_dup(int old_fd);
+int do_dup2(int old_fd, int new_fd);
+int do_pipe(int __user* pipefd);
+ssize_t do_getdents(int fd, char __user* buf, size_t cnt);
+ssize_t do_getdents64(int fd, char __user* buf, size_t cnt);
+int do_open(const char __user* path, int flags, mode_t mode);
+int do_symlink(const char __user* target, const char __user* linkpath);
+int do_readlink(const char __user* pathname, char __user* buf, size_t buf_size);
+int do_ioctl(int fd, unsigned long request, uintptr_t arg3);
+ssize_t do_readv(int fd, const iovec __user* iov, int iovcnt);
+ssize_t do_writev(int fd, const iovec __user* iov, int iovcnt);
+off_t do_lseek(int fd, off_t offset, int whence);
+uintptr_t do_mmap_pgoff(uintptr_t addr, size_t len,
+        int prot, int flags, int fd, off_t pgoffset);
+int do_munmap(uintptr_t addr, size_t len);
+ssize_t do_sendfile(int out_fd, int in_fd, off_t __user* offset, size_t count);
+int do_statx(int dirfd, const char __user* path,
+        int flags, unsigned int mask, statx __user* statxbuf);
+int do_fcntl(int fd, int cmd, unsigned long arg);
+int do_poll(pollfd __user* fds, nfds_t nfds, int timeout);
+int do_mknod(const char __user* pathname, mode_t mode, dev_t dev);
+int do_access(const char __user* pathname, int mode);
+int do_unlink(const char __user* pathname);
+int do_truncate(const char __user* pathname, long length);
+int do_mkdir(const char __user* pathname, mode_t mode);
+
+// in procops.cc
+int do_chdir(const char __user* path);
+[[noreturn]] int do_exit(int status);
+int do_waitpid(pid_t waitpid, int __user* arg1, int options);
+pid_t do_getsid(pid_t pid);
+pid_t do_setsid();
+pid_t do_getpgid(pid_t pid);
+int do_setpgid(pid_t pid, pid_t pgid);
+int do_set_thread_area(user::user_desc __user* ptr);
+pid_t do_set_tid_address(int __user* tidptr);
+int do_prctl(int option, uintptr_t arg2);
+int do_arch_prctl(int option, uintptr_t arg2);
+pid_t do_getpid();
+pid_t do_getppid();
+uid_t do_getuid();
+uid_t do_geteuid();
+gid_t do_getgid();
+pid_t do_gettid();
+char __user* do_getcwd(char __user* buf, size_t buf_size);
+uintptr_t do_brk(uintptr_t addr);
+int do_umask(mode_t mask);
+int do_kill(pid_t pid, int sig);
+int do_rt_sigprocmask(int how, const kernel::sigmask_type __user* set,
+        kernel::sigmask_type __user* oldset, size_t sigsetsize);
+int do_rt_sigaction(int signum, const sigaction __user* act,
+        sigaction __user* oldact, size_t sigsetsize);
+int do_newuname(new_utsname __user* buf);
+
+struct execve_retval {
+    uintptr_t ip;
+    uintptr_t sp;
+    int status;
+};
+
+execve_retval do_execve(
+        const char __user* exec,
+        char __user* const __user* argv,
+        char __user* const __user* envp);
+
+// in mount.cc
+int do_mount(
+        const char __user* source,
+        const char __user* target,
+        const char __user* fstype,
+        unsigned long flags,
+        const void __user* _fsdata);
 
-#define SYSCALL_ARG1(type, name) type name = (type)((data)->head.s_regs.rdi)
-#define SYSCALL_ARG2(type, name) type name = (type)((data)->head.s_regs.rsi)
-#define SYSCALL_ARG3(type, name) type name = (type)((data)->head.s_regs.rdx)
-#define SYSCALL_ARG4(type, name) type name = (type)((data)->head.s_regs.r10)
-#define SYSCALL_ARG5(type, name) type name = (type)((data)->head.s_regs.r8)
-#define SYSCALL_ARG6(type, name) type name = (type)((data)->head.s_regs.r9)
+// in infoops.cc
+int do_clock_gettime(clockid_t clk_id, timespec __user* tp);
+int do_gettimeofday(timeval __user* tv, void __user* tz);
 
-// return value is stored in %rax
-typedef long (*syscall_handler)(interrupt_stack_normal* data);
+} // namespace kernel::syscall
 
-void init_syscall(void);
+} // namespace kernel

+ 2 - 2
include/kernel/task/thread.hpp

@@ -53,13 +53,13 @@ public:
 
     std::string name {};
 
-    uint64_t tls_desc[2] {};
+    uint64_t tls_desc32;
 
     explicit thread(std::string name, pid_t owner);
     thread(const thread& val, pid_t owner);
 
     int set_thread_area(user::user_desc* ptr);
-    int load_thread_area() const;
+    int load_thread_area32() const;
 
     void set_attr(thd_attr_t new_attr);
 

+ 0 - 3
include/kernel/user/thread_local.hpp

@@ -17,8 +17,5 @@ struct user_desc {
 };
 
 void load_thread_area32(uint64_t desc);
-void load_thread_area64(uint64_t desc_lo, uint64_t desc_hi);
-
-void load_thread_area(uint64_t desc_lo, uint64_t desc_hi);
 
 } // namespace kernel::user

+ 7 - 22
pretty-print.py

@@ -94,31 +94,16 @@ class stringPrinter:
         self.val = val
 
     def to_string(self):
-        return self.val['m_data']
+        if self.val['m_data']['stackdata']['end'] == 0:
+            return self.val['m_data']['stackdata']['str'].string()
+        return self.val['m_data']['heapdata']['m_ptr'].string()
+
+    def num_children(self):
+        return 0
 
     def display_hint(self):
         return 'string'
 
-    def children(self):
-        return
-
-        yield 'str', self.val['m_data']
-
-        if self.val['m_data'] == 0:
-            return
-
-        yield 'length', self.val['m_size'] - 1
-
-        ptr = self.val['m_data']
-        i = 0
-
-        while ptr.dereference() != 0:
-            yield '[%d]' % i, ptr.dereference()
-            ptr += 1
-            i += 1
-
-        yield '[%d]' % i, 0
-
 class listPrinter:
     def __init__(self, val):
         self.val: gdb.Field = val
@@ -292,7 +277,7 @@ def build_pretty_printer(val):
     if re.compile(r"^std::impl::rbtree<.*, .*, .*>::_iterator<.*?>$").match(typename):
         return rbtreeIteratorPrinter(val)
 
-    if re.compile(r"^types::string<.*>$").match(typename):
+    if re.compile(r"^std::basic_string<.*>$").match(typename):
         return stringPrinter(val)
 
     return None

+ 33 - 59
src/asm/interrupt.s

@@ -1,5 +1,8 @@
 .text
 
+.extern after_ctx_switch
+.globl ISR_stub_restore
+
 ISR_stub:
 	sub $0x78, %rsp
 	mov %rax,  0x00(%rsp)
@@ -33,6 +36,7 @@ ISR_stub:
 	mov %rsp, %rsi
 	call interrupt_handler
 
+ISR_stub_restore:
 	fxrstor (%rsp)
 	mov %rbx, %rsp
 
@@ -55,75 +59,45 @@ ISR_stub:
 	mov 0x78(%rsp), %rsp
 	iretq
 
-.globl syscall_stub
-.type  syscall_stub @function
-syscall_stub:
-    # pushal
-
-    # save current esp
-    mov %esp, %ebx
-
-    # stack alignment
-    and $0xfffffff0, %esp
-
-    # save mmx registers
-    sub $(512 + 16), %esp
-    fxsave 16(%esp)
-
-    # save pointers to context and mmx registers
-    mov %ebx, (%esp) # pointer to context
-    lea 16(%esp), %eax
-    mov %eax, 4(%esp) # pointer to mmx registers
-
-    # TODO: LONG MODE
-    # call syscall_entry
-
-    # restore mmx registers
-    fxrstor 16(%esp)
-
-    # restore stack
-    mov %ebx, %esp
-
-.globl _syscall_stub_fork_return
-.type  _syscall_stub_fork_return @function
-_syscall_stub_fork_return:
-    # popal
-    iretq
-
 # parameters
-# #1: esp* curr_esp
-# #2: esp* next_esp
+# #1: sp* current_task_sp
+# #2: sp* target_task_sp
 .globl asm_ctx_switch
 .type  asm_ctx_switch @function
 asm_ctx_switch:
-    movl 4(%esp), %ecx
-    movl 8(%esp), %eax
-
-    pushq $_ctx_switch_return
-    push %rbx
-    push %rdi
-    push %rsi
-    push %rbp
     pushf
+	sub $0x38, %rsp  # extra 8 bytes to align to 16 bytes
 
-    # push esp to restore
-    push (%rcx)
+    mov %rbx, 0x08(%rsp)
+	mov %rbp, 0x10(%rsp)
+	mov %r12, 0x18(%rsp)
+	mov %r13, 0x20(%rsp)
+	mov %r14, 0x28(%rsp)
+	mov %r15, 0x30(%rsp)
 
-    mov %esp, (%rcx)
-    mov (%rax), %esp
+    push (%rdi) 	 # save sp of previous stack frame of current
+	                 # acts as saving bp
 
-    # restore esp
-    pop (%rax)
+    mov %rsp, (%rdi) # save sp of current stack
+    mov (%rsi), %rsp # load sp of target stack
 
-    popf
-    pop %rbp
-    pop %rsi
-    pop %rdi
-    pop %rbx
+    pop (%rsi)       # load sp of previous stack frame of target
+	                 # acts as restoring previous bp
 
-    ret
+	pop %rax         # align to 16 bytes
+
+	call after_ctx_switch
+
+	mov %r15, 0x28(%rsp)
+	mov %r14, 0x20(%rsp)
+	mov %r13, 0x18(%rsp)
+	mov %r12, 0x10(%rsp)
+	mov %rbp, 0x08(%rsp)
+    mov %rbx, 0x00(%rsp)
+
+	add $0x30, %rsp
+    popf
 
-_ctx_switch_return:
     ret
 
 .altmacro
@@ -134,7 +108,7 @@ _ctx_switch_return:
 .endm
 
 .set i, 0
-.rept 48
+.rept 0x80+1
 	build_isr %i
 	.set i, i+1
 .endr

+ 1 - 1
src/boot.s

@@ -98,7 +98,7 @@ _fill_loop3:
     # set msr
     mov $0xc0000080, %ecx
     rdmsr
-    or $0x900, %eax # set LME, NXE
+    or $0x901, %eax # set LME, NXE, SCE
     wrmsr
 
     # set cr4

+ 10 - 8
src/kernel/interrupt.cpp

@@ -14,6 +14,7 @@
 #include <kernel/log.hpp>
 #include <kernel/mem/paging.hpp>
 #include <kernel/process.hpp>
+#include <kernel/syscall.hpp>
 #include <kernel/vfs.hpp>
 
 #define KERNEL_INTERRUPT_GATE_TYPE (0x8e)
@@ -61,6 +62,7 @@ void kernel::kinit::init_interrupt()
 {
     for (int i = 0; i < 0x30; ++i)
         set_idt_entry(IDT, i, ISR_START_ADDR+8*i, 0x08, KERNEL_INTERRUPT_GATE_TYPE);
+    set_idt_entry(IDT, 0x80, ISR_START_ADDR+8*0x80, 0x08, USER_INTERRUPT_GATE_TYPE);
 
     uint64_t idt_descriptor[2];
     idt_descriptor[0] = (sizeof(IDT_entry) * 256) << 48;
@@ -98,7 +100,7 @@ void kernel::irq::register_handler(int irqno, irq_handler_t handler)
 
 static inline void fault_handler(
         interrupt_stack_with_code* context,
-        mmx_registers* mmxregs)
+        mmx_registers*)
 {
     switch (context->head.int_no) {
     case 6:
@@ -123,7 +125,7 @@ static inline void fault_handler(
 
 static inline void irq_handler(
         interrupt_stack_normal* context,
-        mmx_registers* mmxregs)
+        mmx_registers*)
 {
     int irqno = context->head.int_no - 0x20;
 
@@ -135,12 +137,6 @@ static inline void irq_handler(
 
     for (const auto& handler : s_irq_handlers[irqno])
         handler();
-
-    // syscall by int 0x80
-    if (context->cs == 0x1b && current_thread->signals.pending_signal())
-        current_thread->signals.handle(context, mmxregs);
-
-    context->head.int_no = (unsigned long)context + 0x80;
 }
 
 extern "C" void interrupt_handler(
@@ -152,8 +148,14 @@ extern "C" void interrupt_handler(
         auto* with_code = (interrupt_stack_with_code*)context;
         fault_handler(with_code, mmxregs);
     }
+    else if (context->int_no == 0x80) { // syscall by int 0x80
+        auto* normal = (interrupt_stack_normal*)context;
+        kernel::handle_syscall32(context->s_regs.rax, normal, mmxregs);
+        context->int_no = (unsigned long)context + 0x80;
+    }
     else {
         auto* normal = (interrupt_stack_normal*)context;
         irq_handler(normal, mmxregs);
+        context->int_no = (unsigned long)context + 0x80;
     }
 }

+ 14 - 10
src/kernel/mem/mm_list.cc

@@ -48,7 +48,12 @@ mm_list::mm_list(const mm_list& other): mm_list{}
     m_areas = other.m_areas;
 
     using namespace paging;
-    for (const auto& area : m_areas) {
+    for (auto iter = m_areas.begin(); iter != m_areas.end(); ++iter) {
+        auto& area = *iter;
+
+        if (area.flags & MM_BREAK)
+            m_brk = iter;
+
         auto this_iter = vaddr_range{m_pt, area.start, area.end};
         auto other_iter = vaddr_range{other.m_pt, area.start, area.end};
 
@@ -104,11 +109,7 @@ bool mm_list::is_avail(uintptr_t addr) const
 
 uintptr_t mm_list::find_avail(uintptr_t hint, size_t len) const
 {
-    auto addr = hint;
-
-    // use default value of mmapp'ed area
-    if (!addr)
-        addr = MMAP_MIN_ADDR;
+    auto addr = std::max(hint, MMAP_MIN_ADDR);
 
     while (!is_avail(addr, len)) {
         auto iter = m_areas.lower_bound(addr);
@@ -128,6 +129,7 @@ void mm_list::switch_pd() const noexcept
 
 int mm_list::register_brk(uintptr_t addr)
 {
+    assert(m_brk == m_areas.end());
     if (!is_avail(addr))
         return -ENOMEM;
 
@@ -155,7 +157,7 @@ uintptr_t mm_list::set_brk(uintptr_t addr)
         pte.set(PA_ANONYMOUS_PAGE | PA_NXE, EMPTY_PAGE_PFN);
 
     m_brk->end = addr;
-    return curbrk;
+    return m_brk->end;
 }
 
 void mm_list::clear()
@@ -164,6 +166,7 @@ void mm_list::clear()
         unmap(iter);
 
     m_areas.clear();
+    m_brk = m_areas.end();
 }
 
 mm_list::iterator mm_list::split(iterator area, uintptr_t addr)
@@ -177,12 +180,13 @@ mm_list::iterator mm_list::split(iterator area, uintptr_t addr)
     if (area->mapped_file)
         new_file_offset = area->file_offset + old_len;
 
+    auto new_end = area->end;
+    area->end = addr;
+
     auto [ iter, inserted ] =
-        m_areas.emplace(addr, area->flags, area->end,
+        m_areas.emplace(addr, area->flags, new_end,
                 area->mapped_file, new_file_offset);
 
-    area->end = addr;
-
     assert(inserted);
     return iter;
 }

+ 11 - 2
src/kernel/mem/paging.cc

@@ -262,8 +262,17 @@ void kernel::mem::paging::handle_page_fault(unsigned long err)
         __page_fault_die(vaddr);
     }
 
-    if (err & PAGE_FAULT_U && err & PAGE_FAULT_P)
-        kill_current(SIGSEGV);
+    // user access to a present page caused the fault
+    // check access rights
+    if (err & PAGE_FAULT_U && err & PAGE_FAULT_P) {
+        // write to read only pages
+        if (err & PAGE_FAULT_W && !(mm_area->flags & MM_WRITE))
+            kill_current(SIGSEGV);
+
+        // execute from non-executable pages
+        if (err & PAGE_FAULT_I && !(mm_area->flags & MM_EXECUTE))
+            kill_current(SIGSEGV);
+    }
 
     auto idx = idx_all(vaddr);
 

+ 59 - 70
src/kernel/process.cpp

@@ -229,7 +229,7 @@ void process::send_signal(signo_type signal)
 
 void kernel_threadd_main(void)
 {
-    kmsg("kernel thread daemon started");
+    kmsg("[kernel] kthread daemon started");
 
     for (;;) {
         if (kthreadd_new_thd_func) {
@@ -248,22 +248,29 @@ void kernel_threadd_main(void)
             // TODO
             (void)func, (void)data;
             assert(false);
-
-            // syscall_fork
-            // int ret = syscall(0x00);
-
-            // if (ret == 0) {
-            //     // child process
-            //     func(data);
-            //     // the function shouldn't return here
-            //     assert(false);
-            // }
         }
         // TODO: sleep here to wait for new_kernel_thread event
         asm volatile("hlt");
     }
 }
 
+static inline void __spawn(kernel::task::thread& thd, uintptr_t entry)
+{
+    auto prev_sp = thd.kstack.sp;
+
+    // return(start) address
+    thd.kstack.pushq(entry);
+    thd.kstack.pushq(0x200);       // flags
+    thd.kstack.pushq(0);           // 0 for alignment
+    thd.kstack.pushq(0);           // rbx
+    thd.kstack.pushq(0);           // rbp
+    thd.kstack.pushq(0);           // r12
+    thd.kstack.pushq(0);           // r13
+    thd.kstack.pushq(0);           // r14
+    thd.kstack.pushq(0);           // r15
+    thd.kstack.pushq(prev_sp);     // previous sp
+}
+
 SECTION(".text.kinit")
 proclist::proclist()
 {
@@ -271,8 +278,8 @@ proclist::proclist()
     auto& init = real_emplace(1, 0);
     assert(init.pid == 1 && init.ppid == 0);
 
-    auto& thd = *init.thds.begin();
-    thd.name.assign("[kernel init]");
+    auto thd = init.thds.begin();
+    thd->name.assign("[kernel init]");
 
     current_process = &init;
     current_thread = &thd;
@@ -288,29 +295,12 @@ proclist::proclist()
         assert(proc.pid == 0 && proc.ppid == 0);
 
         // create thread
-        auto& thd = *proc.thds.begin();
-        thd.name.assign("[kernel thread daemon]");
-
-        // TODO: LONG MODE
-        // auto* esp = &thd.kstack.esp;
-        // auto old_esp = (uint32_t)thd.kstack.esp;
-
-        // // return(start) address
-        // push_stack(esp, (uint32_t)kernel_threadd_main);
-        // // ebx
-        // push_stack(esp, 0);
-        // // edi
-        // push_stack(esp, 0);
-        // // esi
-        // push_stack(esp, 0);
-        // // ebp
-        // push_stack(esp, 0);
-        // // eflags
-        // push_stack(esp, 0x200);
-        // // original esp
-        // push_stack(esp, old_esp);
-
-        // kernel::task::dispatcher::enqueue(&thd);
+        auto thd = proc.thds.begin();
+        thd->name.assign("[kernel thread daemon]");
+
+        __spawn(*thd, (uintptr_t)kernel_threadd_main);
+
+        kernel::task::dispatcher::enqueue(&thd);
     }
 }
 
@@ -326,6 +316,12 @@ void proclist::kill(pid_t pid, int exit_code)
 {
     auto& proc = this->find(pid);
 
+    // init should never exit
+    if (proc.ppid == 0) {
+        kmsg("kernel panic: init exited!");
+        freeze();
+    }
+
     // put all threads into sleep
     for (auto& thd : proc.thds)
         thd.set_attr(kernel::task::thread::ZOMBIE);
@@ -336,12 +332,6 @@ void proclist::kill(pid_t pid, int exit_code)
     // unmap all user memory areas
     proc.mms.clear();
 
-    // init should never exit
-    if (proc.ppid == 0) {
-        kmsg("kernel panic: init exited!");
-        freeze();
-    }
-
     // make child processes orphans (children of init)
     this->make_children_orphans(pid);
 
@@ -456,21 +446,24 @@ void NORETURN _kernel_init(kernel::mem::paging::pfn_t kernel_stack_pfn)
     int ret = types::elf::elf32_load(d);
     assert(ret == 0);
 
+    int ds = 0x33, cs = 0x2b;
+
     asm volatile(
-        "mov $0x23, %%ax\n"
+        "mov %0, %%rax\n"
         "mov %%ax, %%ds\n"
         "mov %%ax, %%es\n"
         "mov %%ax, %%fs\n"
         "mov %%ax, %%gs\n"
 
-        "push $0x23\n"
-        "push %0\n"
+        "push %%rax\n"
+        "push %2\n"
         "push $0x200\n"
-        "push $0x1b\n"
         "push %1\n"
+        "push %3\n"
 
         "iretq\n"
-        : : "g"(d.sp), "g"(d.ip) : "eax", "memory");
+        : : "g"(ds), "g"(cs), "g"(d.sp),
+            "g"(d.ip) : "eax", "memory");
 
     freeze();
 }
@@ -521,38 +514,34 @@ void NORETURN init_scheduler(kernel::mem::paging::pfn_t kernel_stack_pfn)
     freeze();
 }
 
-extern "C" void asm_ctx_switch(uint32_t** curr_esp, uint32_t** next_esp);
+extern "C" void asm_ctx_switch(uintptr_t* curr_sp, uintptr_t* next_sp);
+
+extern "C" void after_ctx_switch()
+{
+    current_thread->kstack.load_interrupt_stack();
+    current_thread->load_thread_area32();
+}
+
 bool schedule()
 {
     if (kernel::async::preempt_count() != 0)
         return true;
+    return true;
 
     auto* next_thd = kernel::task::dispatcher::next();
-    process* proc = nullptr;
-    kernel::task::thread* curr_thd = nullptr;
 
-    if (current_thread == next_thd)
-        goto _end;
-
-    proc = &procs->find(next_thd->owner);
-    if (current_process != proc) {
-        proc->mms.switch_pd();
-        current_process = proc;
-    }
-
-    curr_thd = current_thread;
-
-    freeze();
-    // TODO: LONG MODE
-    // current_thread = next_thd;
-    // tss.esp0 = (uint32_t)next_thd->kstack.esp;
-
-    // next_thd->load_thread_area();
+    if (current_thread != next_thd) {
+        auto* proc = &procs->find(next_thd->owner);
+        if (current_process != proc) {
+            proc->mms.switch_pd();
+            current_process = proc;
+        }
 
-    // asm_ctx_switch(&curr_thd->kstack.esp, &next_thd->kstack.esp);
-    // tss.esp0 = (uint32_t)curr_thd->kstack.esp;
+        auto* curr_thd = current_thread;
+        current_thread = next_thd;
 
-_end:
+        asm_ctx_switch(&curr_thd->kstack.sp, &next_thd->kstack.sp);
+    }
 
     return current_thread->signals.pending_signal() == 0;
 }

File diff suppressed because it is too large
+ 211 - 1086
src/kernel/syscall.cpp


+ 538 - 8
src/kernel/syscall/fileops.cc

@@ -1,16 +1,93 @@
+#include <bits/ioctl.h>
 #include <errno.h>
+#include <poll.h>
+#include <sys/mman.h>
+#include <unistd.h>
 
 #include <types/path.hpp>
 
+#include <kernel/log.hpp>
+#include <kernel/mem/vm_area.hpp>
 #include <kernel/process.hpp>
 #include <kernel/syscall.hpp>
 #include <kernel/vfs.hpp>
 
-long _syscall_symlink(interrupt_stack_normal* data)
+#define NOT_IMPLEMENTED not_implemented(__FILE__, __LINE__)
+
+static inline void not_implemented(const char* pos, int line)
+{
+    kmsgf("[kernel] the function at %s:%d is not implemented, killing the pid%d...",
+            pos, line, current_process->pid);
+    current_thread->send_signal(SIGSYS);
+}
+
+ssize_t kernel::syscall::do_write(int fd, const char __user* buf, size_t n)
+{
+    auto* file = current_process->files[fd];
+    if (!file)
+        return -EBADF;
+
+    return file->write(buf, n);
+}
+
+ssize_t kernel::syscall::do_read(int fd, char __user* buf, size_t n)
 {
-    SYSCALL_ARG1(const char __user*, target);
-    SYSCALL_ARG2(const char __user*, linkpath);
+    auto* file = current_process->files[fd];
+    if (!file)
+        return -EBADF;
+
+    return file->read(buf, n);
+}
 
+int kernel::syscall::do_close(int fd)
+{
+    current_process->files.close(fd);
+    return 0;
+}
+
+int kernel::syscall::do_dup(int old_fd)
+{
+    return current_process->files.dup(old_fd);
+}
+
+int kernel::syscall::do_dup2(int old_fd, int new_fd)
+{
+    return current_process->files.dup2(old_fd, new_fd);
+}
+
+int kernel::syscall::do_pipe(int __user* pipefd)
+{
+    return current_process->files.pipe(pipefd);
+}
+
+ssize_t kernel::syscall::do_getdents(int fd, char __user* buf, size_t cnt)
+{
+    auto* dir = current_process->files[fd];
+    if (!dir)
+        return -EBADF;
+
+    return dir->getdents(buf, cnt);
+}
+
+ssize_t kernel::syscall::do_getdents64(int fd, char __user* buf, size_t cnt)
+{
+    auto* dir = current_process->files[fd];
+    if (!dir)
+        return -EBADF;
+
+    return dir->getdents64(buf, cnt);
+}
+
+int kernel::syscall::do_open(const char __user* path, int flags, mode_t mode)
+{
+    mode &= ~current_process->umask;
+
+    return current_process->files.open(*current_process,
+        current_process->pwd + path, flags, mode);
+}
+
+int kernel::syscall::do_symlink(const char __user* target, const char __user* linkpath)
+{
     // TODO: use copy_from_user
     auto path = current_process->pwd + linkpath;
     auto* dent = fs::vfs_open(*current_process->root, path);
@@ -28,12 +105,8 @@ long _syscall_symlink(interrupt_stack_normal* data)
     return dent->ind->fs->symlink(dent, linkname.c_str(), target);
 }
 
-long _syscall_readlink(interrupt_stack_normal* data)
+int kernel::syscall::do_readlink(const char __user* pathname, char __user* buf, size_t buf_size)
 {
-    SYSCALL_ARG1(const char __user*, pathname);
-    SYSCALL_ARG2(char __user*, buf);
-    SYSCALL_ARG3(size_t, buf_size);
-
     // TODO: use copy_from_user
     auto path = current_process->pwd + pathname;
     auto* dent = fs::vfs_open(*current_process->root, path, false);
@@ -47,3 +120,460 @@ long _syscall_readlink(interrupt_stack_normal* data)
     // TODO: use copy_to_user
     return dent->ind->fs->readlink(dent->ind, buf, buf_size);
 }
+
+int kernel::syscall::do_ioctl(int fd, unsigned long request, uintptr_t arg3)
+{
+    // TODO: check fd type and get tty* from fd
+    //
+    //       we use a trick for now, check whether
+    //       the file that fd points to is a pipe or
+    //       not. and we suppose that stdin will be
+    //       either a tty or a pipe.
+    auto* file = current_process->files[fd];
+    if (!file || !S_ISCHR(file->mode))
+        return -ENOTTY;
+
+    switch (request) {
+    case TIOCGPGRP: {
+        auto* pgid = (pid_t __user*)arg3;
+        auto* ctrl_tty = current_process->control_tty;
+
+        if (!ctrl_tty)
+            return -ENOTTY;
+
+        // TODO: copy_to_user
+        *pgid = ctrl_tty->get_pgrp();
+        break;
+    }
+    case TIOCSPGRP: {
+        // TODO: copy_from_user
+        auto pgid = *(const pid_t __user*)arg3;
+        auto* ctrl_tty = current_process->control_tty;
+
+        if (!ctrl_tty)
+            return -ENOTTY;
+
+        ctrl_tty->set_pgrp(pgid);
+        break;
+    }
+    case TIOCGWINSZ: {
+        auto* ws = (winsize __user*)arg3;
+        // TODO: copy_to_user
+        ws->ws_col = 80;
+        ws->ws_row = 10;
+        break;
+    }
+    case TCGETS: {
+        auto* argp = (struct termios __user*)arg3;
+
+        auto* ctrl_tty = current_process->control_tty;
+        if (!ctrl_tty)
+            return -EINVAL;
+
+        // TODO: use copy_to_user
+        memcpy(argp, &ctrl_tty->termio, sizeof(ctrl_tty->termio));
+
+        break;
+    }
+    case TCSETS: {
+        auto* argp = (const struct termios __user*)arg3;
+
+        auto* ctrl_tty = current_process->control_tty;
+        if (!ctrl_tty)
+            return -EINVAL;
+
+        // TODO: use copy_from_user
+        memcpy(&ctrl_tty->termio, argp, sizeof(ctrl_tty->termio));
+
+        break;
+    }
+    default:
+        kmsgf("[error] the ioctl() function %x is not implemented", request);
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
+ssize_t kernel::syscall::do_readv(int fd, const iovec __user* iov, int iovcnt)
+{
+    auto* file = current_process->files[fd];
+
+    if (!file)
+        return -EBADF;
+
+    // TODO: fix fake EOF
+    ssize_t totn = 0;
+    for (int i = 0; i < iovcnt; ++i) {
+        ssize_t ret = file->read(
+            (char*)iov[i].iov_base, iov[i].iov_len);
+
+        if (ret < 0)
+            return ret;
+
+        if (ret == 0)
+            break;
+
+        totn += ret;
+
+        if ((size_t)ret != iov[i].iov_len)
+            break;
+    }
+
+    return totn;
+}
+
+// TODO: this operation SHOULD be atomic
+ssize_t kernel::syscall::do_writev(int fd, const iovec __user* iov, int iovcnt)
+{
+    auto* file = current_process->files[fd];
+
+    if (!file)
+        return -EBADF;
+
+    ssize_t totn = 0;
+    for (int i = 0; i < iovcnt; ++i) {
+        ssize_t ret = file->write(
+            (const char*)iov[i].iov_base, iov[i].iov_len);
+
+        if (ret < 0)
+            return ret;
+        totn += ret;
+    }
+
+    return totn;
+}
+
+off_t kernel::syscall::do_lseek(int fd, off_t offset, int whence)
+{
+    auto* file = current_process->files[fd];
+    if (!file)
+        return -EBADF;
+
+    return file->seek(offset, whence);
+}
+
+uintptr_t kernel::syscall::do_mmap_pgoff(uintptr_t addr, size_t len,
+        int prot, int flags, int fd, off_t pgoffset)
+{
+    if (addr & 0xfff)
+        return -EINVAL;
+    if (len == 0)
+        return -EINVAL;
+
+    len = (len + 0xfff) & ~0xfff;
+
+    // TODO: shared mappings
+    if (flags & MAP_SHARED)
+        return -ENOMEM;
+
+    if (flags & MAP_ANONYMOUS) {
+        if (fd != -1)
+            return -EINVAL;
+        if (pgoffset != 0)
+            return -EINVAL;
+
+        // TODO: shared mappings
+        if (!(flags & MAP_PRIVATE))
+            return -EINVAL;
+
+        auto& mms = current_process->mms;
+
+        // do unmapping, equal to munmap, MAP_FIXED set
+        if (prot == PROT_NONE) {
+            if (int ret = mms.unmap(addr, len); ret != 0)
+                return ret;
+        }
+        else {
+            // TODO: add NULL check in mm_list
+            if (!addr || !mms.is_avail(addr, len)) {
+                if (flags & MAP_FIXED)
+                    return -ENOMEM;
+                addr = mms.find_avail(addr, len);
+            }
+
+            // TODO: check current cs
+            if (addr + len > 0x100000000ULL)
+                return -ENOMEM;
+
+            mem::mm_list::map_args args{};
+            args.vaddr = addr;
+            args.length = len;
+            args.flags = mem::MM_ANONYMOUS;
+
+            if (prot & PROT_WRITE)
+                args.flags |= mem::MM_WRITE;
+
+            if (prot & PROT_EXEC)
+                args.flags |= mem::MM_EXECUTE;
+
+            if (int ret = mms.mmap(args); ret != 0)
+                return ret;
+        }
+    }
+
+    return addr;
+}
+
+int kernel::syscall::do_munmap(uintptr_t addr, size_t len)
+{
+    if (addr & 0xfff)
+        return -EINVAL;
+
+    return current_process->mms.unmap(addr, len);
+}
+
+ssize_t kernel::syscall::do_sendfile(int out_fd, int in_fd,
+        off_t __user* offset, size_t count)
+{
+    auto* out_file = current_process->files[out_fd];
+    auto* in_file = current_process->files[in_fd];
+
+    if (!out_file || !in_file)
+        return -EBADF;
+
+    // TODO: check whether in_fd supports mmapping
+    if (!S_ISREG(in_file->mode) && !S_ISBLK(in_file->mode))
+        return -EINVAL;
+
+    if (offset) {
+        NOT_IMPLEMENTED;
+        return -EINVAL;
+    }
+
+    constexpr size_t bufsize = 4096;
+    std::vector<char> buf(bufsize);
+    size_t totn = 0;
+    while (totn < count) {
+        if (current_thread->signals.pending_signal() != 0)
+            return (totn == 0) ? -EINTR : totn;
+
+        size_t n = std::min(count - totn, bufsize);
+        ssize_t ret = in_file->read(buf.data(), n);
+        if (ret < 0)
+            return ret;
+        if (ret == 0)
+            break;
+        ret = out_file->write(buf.data(), ret);
+        if (ret < 0)
+            return ret;
+        totn += ret;
+
+        // TODO: this won't work, since when we are in the syscall handler,
+        //       interrupts are blocked.
+        //       one solution is to put the sendfile action into a kernel
+        //       worker and pause the calling thread so that the worker
+        //       thread could be interrupted normally.
+    }
+
+    return totn;
+}
+
+int kernel::syscall::do_statx(int dirfd, const char __user* path,
+        int flags, unsigned int mask, statx __user* statxbuf)
+{
+    // AT_STATX_SYNC_AS_STAT is the default value
+    if ((flags & AT_STATX_SYNC_TYPE) != AT_STATX_SYNC_AS_STAT) {
+        NOT_IMPLEMENTED;
+        return -EINVAL;
+    }
+
+    if (dirfd != AT_FDCWD) {
+        NOT_IMPLEMENTED;
+        return -EINVAL;
+    }
+
+    auto* dent = fs::vfs_open(*current_process->root,
+            current_process->pwd + path,
+            !(flags & AT_SYMLINK_NOFOLLOW));
+
+    if (!dent)
+        return -ENOENT;
+
+    // TODO: copy to user
+    auto ret = fs::vfs_stat(dent, statxbuf, mask);
+
+    return ret;
+}
+
+int kernel::syscall::do_fcntl(int fd, int cmd, unsigned long arg)
+{
+    auto* file = current_process->files[fd];
+    if (!file)
+        return -EBADF;
+
+    switch (cmd) {
+    case F_SETFD:
+        return current_process->files.set_flags(fd, arg);
+    case F_DUPFD:
+    case F_DUPFD_CLOEXEC: {
+        return current_process->files.dupfd(fd, arg, FD_CLOEXEC);
+    }
+    default:
+        NOT_IMPLEMENTED;
+        return -EINVAL;
+    }
+}
+
+int kernel::syscall::do_mkdir(const char __user* pathname, mode_t mode)
+{
+    mode &= (~current_process->umask & 0777);
+
+    auto path = current_process->pwd + pathname;
+
+    auto* dent = fs::vfs_open(*current_process->root, path);
+    if (dent)
+        return -EEXIST;
+
+    // get parent path
+    auto dirname = path.last_name();
+    path.remove_last();
+
+    dent = fs::vfs_open(*current_process->root, path);
+    if (!dent)
+        return -ENOENT;
+
+    if (!S_ISDIR(dent->ind->mode))
+        return -ENOTDIR;
+
+    auto ret = fs::vfs_mkdir(dent, dirname.c_str(), mode);
+
+    if (ret != 0)
+        return ret;
+
+    return 0;
+}
+
+int kernel::syscall::do_truncate(const char __user* pathname, long length)
+{
+    auto path = current_process->pwd + pathname;
+
+    auto* dent = fs::vfs_open(*current_process->root, path);
+    if (!dent)
+        return -ENOENT;
+
+    if (S_ISDIR(dent->ind->mode))
+        return -EISDIR;
+
+    auto ret = fs::vfs_truncate(dent->ind, length);
+
+    if (ret != 0)
+        return ret;
+
+    return 0;
+}
+
+int kernel::syscall::do_unlink(const char __user* pathname)
+{
+    auto path = current_process->pwd + pathname;
+    auto* dent = fs::vfs_open(*current_process->root, path, false);
+
+    if (!dent)
+        return -ENOENT;
+
+    if (S_ISDIR(dent->ind->mode))
+        return -EISDIR;
+
+    return fs::vfs_rmfile(dent->parent, dent->name.c_str());
+}
+
+int kernel::syscall::do_access(const char __user* pathname, int mode)
+{
+    auto path = current_process->pwd + pathname;
+    auto* dent = fs::vfs_open(*current_process->root, path);
+
+    if (!dent)
+        return -ENOENT;
+
+    switch (mode) {
+    case F_OK:
+        return 0;
+    case R_OK:
+    case W_OK:
+    case X_OK:
+        // TODO: check privilege
+        return 0;
+    default:
+        return -EINVAL;
+    }
+}
+
+int kernel::syscall::do_mknod(const char __user* pathname, mode_t mode, dev_t dev)
+{
+    auto path = current_process->pwd + pathname;
+    auto* dent = fs::vfs_open(*current_process->root, path);
+
+    if (dent)
+        return -EEXIST;
+
+    auto filename = path.last_name();
+    path.remove_last();
+
+    dent = fs::vfs_open(*current_process->root, path);
+    if (!dent)
+        return -ENOENT;
+
+    return fs::vfs_mknode(dent, filename.c_str(), mode, dev);
+}
+
+int kernel::syscall::do_poll(pollfd __user* fds, nfds_t nfds, int timeout)
+{
+    if (nfds == 0)
+        return 0;
+
+    if (nfds > 1) {
+        NOT_IMPLEMENTED;
+        return -EINVAL;
+    }
+
+    // TODO: handle timeout
+    // if (timeout != -1) {
+    // }
+    (void)timeout;
+
+    // for now, we will poll from console only
+    int ret = tty::console->poll();
+    if (ret < 0)
+        return ret;
+
+    fds[0].revents = POLLIN;
+    return ret;
+
+    // TODO: check address validity
+    // TODO: poll multiple fds and other type of files
+    // for (nfds_t i = 0; i < nfds; ++i) {
+    //     auto& pfd = fds[i];
+
+    //     auto* file = current_process->files[pfd.fd];
+    //     if (!file || !S_ISCHR(file->mode))
+    //         return -EINVAL;
+
+    //     // poll the fds
+    // }
+    //
+    // return 0;
+}
+
+/* TODO: implement vfs_stat(stat*)
+int do_stat(const char __user* pathname, stat __user* buf)
+{
+    auto* dent = fs::vfs_open(*current_process->root,
+        types::make_path(pathname, current_process->pwd));
+
+    if (!dent)
+        return -ENOENT;
+
+    return fs::vfs_stat(dent, buf);
+}
+*/
+
+/* TODO: implement vfs_stat(stat*)
+int do_fstat(int fd, stat __user* buf)
+{
+    auto* file = current_process->files[fd];
+    if (!file)
+        return -EBADF;
+
+    return fs::vfs_stat(file, buf);
+}
+*/

+ 51 - 0
src/kernel/syscall/infoops.cc

@@ -0,0 +1,51 @@
+#include <bits/alltypes.h>
+#include <time.h>
+
+#include <kernel/hw/timer.hpp>
+#include <kernel/log.hpp>
+#include <kernel/process.hpp>
+#include <kernel/syscall.hpp>
+
+#define NOT_IMPLEMENTED not_implemented(__FILE__, __LINE__)
+
+static inline void not_implemented(const char* pos, int line)
+{
+    kmsgf("[kernel] the function at %s:%d is not implemented, killing the pid%d...",
+            pos, line, current_process->pid);
+    current_thread->send_signal(SIGSYS);
+}
+
+int kernel::syscall::do_clock_gettime(clockid_t clk_id, timespec __user* tp)
+{
+    if (clk_id != CLOCK_REALTIME && clk_id != CLOCK_MONOTONIC) {
+        NOT_IMPLEMENTED;
+        return -EINVAL;
+    }
+
+    if (!tp)
+        return -EFAULT;
+
+    auto time = hw::timer::current_ticks();
+
+    // TODO: copy_to_user
+    tp->tv_sec = time / 100;
+    tp->tv_nsec = 10000000 * (time % 100);
+
+    return 0;
+}
+
+int kernel::syscall::do_gettimeofday(timeval __user* tv, void __user* tz)
+{
+    // TODO: return time of the day, not time from this boot
+    if (tz) [[unlikely]]
+        return -EINVAL;
+
+    if (tv) {
+        // TODO: use copy_to_user
+        auto ticks = kernel::hw::timer::current_ticks();
+        tv->tv_sec = ticks / 100;
+        tv->tv_usec = ticks * 10 * 1000;
+    }
+
+    return 0;
+}

+ 6 - 7
src/kernel/syscall/mount.cc

@@ -6,14 +6,13 @@
 #include <kernel/syscall.hpp>
 #include <kernel/vfs.hpp>
 
-long _syscall_mount(interrupt_stack_normal* data)
+int kernel::syscall::do_mount(
+        const char __user* source,
+        const char __user* target,
+        const char __user* fstype,
+        unsigned long flags,
+        const void __user* _fsdata)
 {
-    SYSCALL_ARG1(const char __user*, source);
-    SYSCALL_ARG2(const char __user*, target);
-    SYSCALL_ARG3(const char __user*, fstype);
-    SYSCALL_ARG4(unsigned long, flags);
-    SYSCALL_ARG5(const void __user*, _fsdata);
-
     if (!fstype)
         return -EINVAL;
 

+ 393 - 0
src/kernel/syscall/procops.cc

@@ -0,0 +1,393 @@
+#include <sys/prctl.h>
+#include <sys/utsname.h>
+#include <sys/wait.h>
+
+#include <types/elf.hpp>
+
+#include <kernel/log.hpp>
+#include <kernel/process.hpp>
+#include <kernel/signal.hpp>
+#include <kernel/syscall.hpp>
+#include <kernel/utsname.hpp>
+
+using namespace kernel::syscall;
+
+#define NOT_IMPLEMENTED not_implemented(__FILE__, __LINE__)
+
+static inline void not_implemented(const char* pos, int line)
+{
+    kmsgf("[kernel] the function at %s:%d is not implemented, killing the pid%d...",
+            pos, line, current_process->pid);
+    current_thread->send_signal(SIGSYS);
+}
+
+int kernel::syscall::do_chdir(const char __user* path)
+{
+    auto* dir = fs::vfs_open(*current_process->root,
+            current_process->pwd + path);
+    if (!dir)
+        return -ENOENT;
+
+    if (!S_ISDIR(dir->ind->mode))
+        return -ENOTDIR;
+
+    current_process->pwd.clear();
+    dir->path(*current_process->root, current_process->pwd);
+
+    return 0;
+}
+
+execve_retval kernel::syscall::do_execve(
+        const char __user* exec,
+        char __user* const __user* argv,
+        char __user* const __user* envp)
+{
+    types::elf::elf32_load_data d;
+
+    if (!exec || !argv || !envp)
+        return { 0, 0, -EFAULT };
+
+    // TODO: use copy_from_user
+    while (*argv)
+        d.argv.push_back(*(argv++));
+
+    while (*envp)
+        d.envp.push_back(*(envp++));
+
+    d.exec_dent = fs::vfs_open(*current_process->root,
+            current_process->pwd + exec);
+
+    if (!d.exec_dent)
+        return { 0, 0, -ENOENT };
+
+    current_process->files.onexec();
+
+    // TODO: set cs and ss to compatibility mode
+    if (int ret = types::elf::elf32_load(d); ret != 0)
+        return { 0, 0, ret };
+
+    current_thread->signals.on_exec();
+
+    return { d.ip, d.sp, 0 };
+}
+
+
+int kernel::syscall::do_exit(int status)
+{
+    // TODO: terminating a thread only
+    assert(current_process->thds.size() == 1);
+
+    // terminating a whole process:
+    procs->kill(current_process->pid, (status & 0xff) << 8);
+
+    // switch to new process and continue
+    schedule_noreturn();
+}
+
+int kernel::syscall::do_waitpid(pid_t waitpid, int __user* arg1, int options)
+{
+    if (waitpid != -1)
+        return -EINVAL;
+
+    auto& cv = current_process->waitlist;
+    kernel::async::lock_guard lck(current_process->mtx_waitprocs);
+
+    auto& waitlist = current_process->waitprocs;
+
+    // TODO: check if it is waiting for stopped process
+    if (options & ~(WNOHANG | WUNTRACED)) {
+        NOT_IMPLEMENTED;
+        return -EINVAL;
+    }
+
+    while (waitlist.empty()) {
+        if (current_process->children.empty())
+            return -ECHILD;
+
+        if (options & WNOHANG)
+            return 0;
+
+        bool interrupted = cv.wait(current_process->mtx_waitprocs);
+        if (interrupted)
+            return -EINTR;
+    }
+
+    for (auto iter = waitlist.begin(); iter != waitlist.end(); ++iter) {
+        if (WIFSTOPPED(iter->code) && !(options & WUNTRACED))
+            continue;
+
+        pid_t pid = iter->pid;
+
+        // TODO: copy_to_user
+        *arg1 = iter->code;
+
+        procs->remove(pid);
+        waitlist.erase(iter);
+
+        return pid;
+    }
+
+    // we should never reach here
+    freeze();
+    return -EINVAL;
+}
+
+char __user* kernel::syscall::do_getcwd(char __user* buf, size_t buf_size)
+{
+    // TODO: use copy_to_user
+    auto path = current_process->pwd.full_path();
+    strncpy(buf, path.c_str(), buf_size);
+    buf[buf_size - 1] = 0;
+
+    return buf;
+}
+
+pid_t kernel::syscall::do_setsid()
+{
+    if (current_process->pid == current_process->pgid)
+        return -EPERM;
+
+    current_process->sid = current_process->pid;
+    current_process->pgid = current_process->pid;
+
+    // TODO: get tty* from fd or block device id
+    tty::console->set_pgrp(current_process->pid);
+    current_process->control_tty = tty::console;
+
+    return current_process->pid;
+}
+
+pid_t kernel::syscall::do_getsid(pid_t pid)
+{
+    auto [ pproc, found ] = procs->try_find(pid);
+    if (!found)
+        return -ESRCH;
+    if (pproc->sid != current_process->sid)
+        return -EPERM;
+
+    return pproc->sid;
+}
+
+int kernel::syscall::do_setpgid(pid_t pid, pid_t pgid)
+{
+    if (pgid < 0)
+        return -EINVAL;
+
+    if (pid == 0)
+        pid = current_process->pid;
+
+    if (pgid == 0)
+        pgid = pid;
+
+    auto [ pproc, found ] = procs->try_find(pid);
+    if (!found)
+        return -ESRCH;
+
+    // TODO: check whether pgid and the original
+    //       pgid is in the same session
+
+    pproc->pgid = pgid;
+
+    return 0;
+}
+
+int kernel::syscall::do_set_thread_area(kernel::user::user_desc __user* ptr)
+{
+    auto ret = current_thread->set_thread_area(ptr);
+    if (ret != 0)
+        return ret;
+
+    current_thread->load_thread_area32();
+    return 0;
+}
+
+pid_t kernel::syscall::do_set_tid_address(int __user* tidptr)
+{
+    // TODO: copy_from_user
+    current_thread->set_child_tid = tidptr;
+    return current_thread->tid();
+}
+
+int kernel::syscall::do_prctl(int option, uintptr_t arg2)
+{
+    switch (option) {
+    case PR_SET_NAME: {
+        // TODO: copy_from_user
+        auto* name = (const char __user*)arg2;
+        current_thread->name.assign(name, 15);
+        break;
+    }
+    case PR_GET_NAME: {
+        auto* name = (char __user*)arg2;
+        // TODO: copy_to_user
+        strncpy(name, current_thread->name.c_str(), 16);
+        name[15] = 0;
+        break;
+    }
+    default:
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
+int kernel::syscall::do_arch_prctl(int option, uintptr_t arg2)
+{
+    switch (option) {
+    case PR_SET_NAME: {
+        // TODO: copy_from_user
+        auto* name = (const char __user*)arg2;
+        current_thread->name.assign(name, 15);
+        break;
+    }
+    case PR_GET_NAME: {
+        auto* name = (char __user*)arg2;
+        // TODO: copy_to_user
+        strncpy(name, current_thread->name.c_str(), 16);
+        name[15] = 0;
+        break;
+    }
+    default:
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
+int kernel::syscall::do_umask(mode_t mask)
+{
+    mode_t old = current_process->umask;
+    current_process->umask = mask;
+
+    return old;
+}
+
+int kernel::syscall::do_kill(pid_t pid, int sig)
+{
+    auto [ pproc, found ] = procs->try_find(pid);
+    if (!found)
+        return -ESRCH;
+
+    if (!kernel::signal_list::check_valid(sig))
+        return -EINVAL;
+
+    if (pproc->is_system())
+        return 0;
+
+    // TODO: check permission
+    procs->send_signal(pid, sig);
+
+    return 0;
+}
+
+int kernel::syscall::do_rt_sigprocmask(int how, const sigmask_type __user* set,
+        sigmask_type __user* oldset, size_t sigsetsize)
+{
+    if (sigsetsize != sizeof(sigmask_type))
+        return -EINVAL;
+
+    sigmask_type sigs = current_thread->signals.get_mask();
+
+    // TODO: use copy_to_user
+    if (oldset)
+        memcpy(oldset, &sigs, sizeof(sigmask_type));
+
+    if (!set)
+        return 0;
+
+    // TODO: use copy_from_user
+    switch (how) {
+    case SIG_BLOCK:
+        current_thread->signals.mask(*set);
+        break;
+    case SIG_UNBLOCK:
+        current_thread->signals.unmask(*set);
+        break;
+    case SIG_SETMASK:
+        current_thread->signals.set_mask(*set);
+        break;
+    }
+
+    return 0;
+}
+
+int kernel::syscall::do_rt_sigaction(int signum, const sigaction __user* act,
+        sigaction __user* oldact, size_t sigsetsize)
+{
+    if (sigsetsize != sizeof(sigmask_type))
+        return -EINVAL;
+
+    if (!kernel::signal_list::check_valid(signum)
+        || signum == SIGKILL || signum == SIGSTOP)
+        return -EINVAL;
+
+    // TODO: use copy_to_user
+    if (oldact)
+        current_thread->signals.get_handler(signum, *oldact);
+
+    if (!act)
+        return 0;
+
+    // TODO: use copy_from_user
+    current_thread->signals.set_handler(signum, *act);
+
+    return 0;
+}
+
+int kernel::syscall::do_newuname(new_utsname __user* buf)
+{
+    if (!buf)
+        return -EFAULT;
+
+    // TODO: use copy_to_user
+    memcpy(buf, sys_utsname, sizeof(new_utsname));
+
+    return 0;
+}
+
+pid_t kernel::syscall::do_getpgid(pid_t pid)
+{
+    if (pid == 0)
+        return current_process->pgid;
+
+    auto [ pproc, found ] = procs->try_find(pid);
+    if (!found)
+        return -ESRCH;
+
+    return pproc->pgid;
+}
+
+pid_t kernel::syscall::do_getpid()
+{
+    return current_process->pid;
+}
+
+pid_t kernel::syscall::do_getppid()
+{
+    return current_process->ppid;
+}
+
+uid_t kernel::syscall::do_getuid()
+{
+    return 0; // all users are root for now
+}
+
+uid_t kernel::syscall::do_geteuid()
+{
+    return 0; // all users are root for now
+}
+
+gid_t kernel::syscall::do_getgid()
+{
+    return 0; // all users are root for now
+}
+
+pid_t kernel::syscall::do_gettid()
+{
+    return current_thread->tid();
+}
+
+uintptr_t kernel::syscall::do_brk(uintptr_t addr)
+{
+    return current_process->mms.set_brk(addr);
+}

+ 9 - 11
src/kernel/task/thread.cc

@@ -164,27 +164,25 @@ int thread::set_thread_area(kernel::user::user_desc* ptr)
     }
 
     if (ptr->entry_number == -1U)
-        ptr->entry_number = 6;
+        ptr->entry_number = 7;
     else
         return -1;
 
     if (!ptr->seg_32bit)
         return -1;
 
-    tls_desc[0]  = ptr->limit & 0x0000'ffff;
-    tls_desc[0] |= (ptr->base_addr & 0x00ff'ffffULL) << 16;
-    tls_desc[0] |= 0xe2'00'0000'0000;
-    tls_desc[0] |= (ptr->limit & 0x000f'0000ULL) << (48-16);
-    tls_desc[0] |= ((ptr->limit_in_pages + 0ULL) << 55);
-    tls_desc[0] |= (ptr->base_addr & 0xf000'0000) << (56-28);
-
-    tls_desc[1]  = 0; // 63:32: all 0, 31:0: ptr->base_addr[63:32]
+    tls_desc32  = ptr->limit & 0x0'ffff;
+    tls_desc32 |= (ptr->base_addr & 0x00'ffffffULL) << 16;
+    tls_desc32 |= 0x4'0'f2'000000'0000;
+    tls_desc32 |= (ptr->limit & 0xf'0000ULL) << (48-16);
+    tls_desc32 |= ((ptr->limit_in_pages + 0ULL) << 55);
+    tls_desc32 |= (ptr->base_addr & 0xff'000000ULL) << (56-24);
 
     return 0;
 }
 
-int thread::load_thread_area() const
+int thread::load_thread_area32() const
 {
-    kernel::user::load_thread_area(tls_desc[0], tls_desc[1]);
+    kernel::user::load_thread_area32(tls_desc32);
     return 0;
 }

+ 1 - 19
src/kernel/user/thread_local.cc

@@ -10,29 +10,11 @@ using namespace kernel::user;
 
 void kernel::user::load_thread_area32(uint64_t desc)
 {
-    mem::gdt[7] = desc;
-    asm volatile(
-        "mov %%gs, %%ax\n\t"
-        "mov %%ax, %%gs\n\t"
-        : : : "ax"
-    );
-}
-
-void kernel::user::load_thread_area64(uint64_t desc_lo, uint64_t desc_hi)
-{
-    mem::gdt[12] = desc_lo;
-    mem::gdt[13] = desc_hi;
+    kernel::mem::gdt[7] = desc;
 
     asm volatile(
-        "mov %%fs, %%ax\n\t"
-        "mov %%ax, %%fs\n\t"
         "mov %%gs, %%ax\n\t"
         "mov %%ax, %%gs\n\t"
         : : : "ax"
     );
 }
-
-void kernel::user::load_thread_area(uint64_t desc_lo, uint64_t desc_hi)
-{
-    load_thread_area64(desc_lo, desc_hi);
-}

+ 6 - 5
src/kinit.cpp

@@ -84,7 +84,7 @@ void NORETURN real_kernel_init(mem::paging::pfn_t kernel_stack_pfn)
 
     // TODO: remove this
     init_vfs();
-    // init_syscall();
+    init_syscall_table();
 
     init_scheduler(kernel_stack_pfn);
 }
@@ -187,7 +187,7 @@ void setup_gdt()
     mem::gdt[5]  = 0x00cf'fa00'0000'ffff;
     // user data32
     mem::gdt[6]  = 0x00cf'f200'0000'ffff;
-    // reserved
+    // thread load 32bit
     mem::gdt[7]  = 0x0000'0000'0000'0000;
 
     // TSS descriptor
@@ -195,11 +195,12 @@ void setup_gdt()
     mem::gdt[9]  = 0x0000'0000'ffff'ff00;
 
     // LDT descriptor
-    mem::gdt[10] = 0x0000'8200'0060'000f;
+    mem::gdt[10] = 0x0000'8200'0060'001f;
     mem::gdt[11] = 0x0000'0000'ffff'ff00;
 
-    // thread local
+    // null segment
     mem::gdt[12] = 0x0000'0000'0000'0000;
+    // thread local 64bit
     mem::gdt[13] = 0x0000'0000'0000'0000;
 
     uint64_t descriptor[] = {
@@ -212,7 +213,7 @@ void setup_gdt()
             "lldt %%ax\n\t"
             "mov $0x40, %%ax\n\t"
             "ltr %%ax\n\t"
-            : : "r"((uintptr_t)descriptor+6): "ax"
+            : : "r"((uintptr_t)descriptor+6): "ax", "memory"
     );
 }
 

+ 2 - 2
src/types/elf.cpp

@@ -183,8 +183,8 @@ int types::elf::elf32_load(types::elf::elf32_load_data& d)
     __user_push32(sp, 0);
 
     // push argv
-    for (auto ent : args)
-        __user_push32(sp, ent);
+    for (int i = args.size()-1; i >= 0; --i)
+        __user_push32(sp, args[i]);
 
     // push argc
     __user_push32(sp, args.size());

Some files were not shown because too many files changed in this diff