/* Calcite, src/kernel/process.c * Copyright 2025 Benji Dial * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program. If not, see . */ #include "framebuffer.h" #include "ipc-dgram.h" #include "scheduler.h" #include "syscalls.h" #include "process.h" #include "utility.h" #include "paging.h" #include "panic.h" #include "heap.h" #include "fs.h" #include #include struct process_file_info *get_file_info(struct process *process, file_handle_t handle) { if (handle > (uint64_t)process->files_buffer_size || process->files[handle].fs == 0) return 0; return &process->files[handle]; } void create_process(struct process *process_out) { process_out->p4_physical_base = take_free_physical_page(); process_out->p4_virtual_base = find_free_kernel_region(4096); map_in_kernel_page_table( process_out->p4_physical_base, process_out->p4_virtual_base, 1, 0); process_out->p3_physical_base = take_free_physical_page(); process_out->p3_virtual_base = find_free_kernel_region(4096); map_in_kernel_page_table( process_out->p3_physical_base, process_out->p3_virtual_base, 1, 0); process_out->p4_virtual_base[0] = process_out->p3_physical_base | 0x7; process_out->p4_virtual_base[511] = kernel_p3_physical_address | 0x3; for (int i = 1; i < 511; ++i) process_out->p4_virtual_base[i] = 0; for (int i = 0; i < 512; ++i) { process_out->p3_virtual_base[i] = 0; process_out->p2_virtual_bases[i] = 0; process_out->p1_virtual_bases[i] = 0; process_out->owned_pages_bitmaps[i] = 0; } process_out->n_threads = 0; process_out->files = 0; process_out->ipc_dgram_handles = 0; } void map_page_for_process( struct process *process, uint64_t physical_base, void *virtual_base, int writable, int executable, int owned) { assert(physical_base % 4096 == 0) uint64_t vma = (uint64_t)virtual_base; assert(vma % 4096 == 0) assert(vma != 0) assert(vma < 0x0000008000000000) int p1i = (vma >> 12) & 0x1ff; int p2i = (vma >> 21) & 0x1ff; int p3i = (vma >> 30) & 0x1ff; if (process->p3_virtual_base[p3i] == 0) { uint64_t p2_pma = take_free_physical_page(); uint64_t *p2_vma = find_free_kernel_region(4096); map_in_kernel_page_table(p2_pma, p2_vma, 1, 0); process->p3_virtual_base[p3i] = p2_pma | 0x7; process->p2_virtual_bases[p3i] = p2_vma; process->p1_virtual_bases[p3i] = heap_alloc(4096); process->owned_pages_bitmaps[p3i] = heap_alloc(4096); for (int i = 0; i < 512; ++i) { p2_vma[i] = 0; process->p1_virtual_bases[p3i][i] = 0; process->owned_pages_bitmaps[p3i][i] = 0; } } if (process->p2_virtual_bases[p3i][p2i] == 0) { uint64_t p1_pma = take_free_physical_page(); uint64_t *p1_vma = find_free_kernel_region(4096); map_in_kernel_page_table(p1_pma, p1_vma, 1, 0); process->p2_virtual_bases[p3i][p2i] = p1_pma | 0x7; process->p1_virtual_bases[p3i][p2i] = p1_vma; process->owned_pages_bitmaps[p3i][p2i] = heap_alloc(64); for (int i = 0; i < 512; ++i) p1_vma[i] = 0; for (int i = 0; i < 64; ++i) process->owned_pages_bitmaps[p3i][p2i][i] = 0; } assert(process->p1_virtual_bases[p3i][p2i][p1i] == 0) process->p1_virtual_bases[p3i][p2i][p1i] = physical_base | 0x5 | (writable ? 0x2 : 0x0) | (executable ? 0 : 0x8000000000000000); if (owned) process->owned_pages_bitmaps[p3i][p2i][p1i / 8] |= 1 << (p1i % 8); } void unmap_page_for_process( struct process *process, void *virtual_base) { uint64_t vma = (uint64_t)virtual_base; assert(vma % 4096 == 0) assert(vma != 0) assert(vma < 0x0000008000000000) int p1i = (vma >> 12) & 0x1ff; int p2i = (vma >> 21) & 0x1ff; int p3i = (vma >> 30) & 0x1ff; assert( process->p1_virtual_bases[p3i] && process->p1_virtual_bases[p3i][p2i] && process->p1_virtual_bases[p3i][p2i][p1i]) if (process->owned_pages_bitmaps[p3i][p2i][p1i / 8] & (1 << (p1i % 8))) { process->owned_pages_bitmaps[p3i][p2i][p1i / 8] &= ~(1 << (p1i % 8)); uint64_t pma = process->p1_virtual_bases[p3i][p2i][p1i] & 0x7ffffffffffff000; mark_physical_memory_free(pma, 4096); } process->p1_virtual_bases[p3i][p2i][p1i] = 0; } static int is_mapped_with_flags(struct process *process, const void *start, uint64_t length, uint64_t flags) { uint64_t vma_start = (uint64_t)start; uint64_t vma_end = vma_start + length; vma_start = (vma_start / 4096) * 4096; vma_end = ((vma_end - 1) / 4096 + 1) * 4096; for (uint64_t vma = vma_start; vma < vma_end; vma += 4096) { if (vma == 0 || vma >= 0x0000008000000000) return 0; int p1i = (vma >> 12) & 0x1ff; int p2i = (vma >> 21) & 0x1ff; int p3i = (vma >> 30) & 0x1ff; if (!process->p1_virtual_bases[p3i] || !process->p1_virtual_bases[p3i][p2i] || !process->p1_virtual_bases[p3i][p2i][p1i] || (process->p1_virtual_bases[p3i][p2i][p1i] & flags) != flags) return 0; } return 1; } int is_mapped_writable(struct process *process, const void *start, uint64_t length) { return is_mapped_with_flags(process, start, length, 0x7); } int is_mapped_readable(struct process *process, const void *start, uint64_t length) { return is_mapped_with_flags(process, start, length, 0x5); } int is_mapped_readable_string(struct process *process, const char *start) { if (!is_mapped_readable(process, start, 1)) return 0; while (1) { if ((uint64_t)start % 4096 == 0) if (!is_mapped_readable(process, start, 1)) return 0; if (*start == 0) return 1; ++start; } } void *find_free_process_region( struct process *process, uint64_t page_count) { uint64_t start = 512 * 512 * 4096; uint64_t length = 0; while (1) { if (length >= page_count * 4096) break; uint64_t vma = start + length; if (vma >= 0x0000008000000000) goto no_mem; int p1i = (vma >> 12) & 0x1ff; int p2i = (vma >> 21) & 0x1ff; int p3i = (vma >> 30) & 0x1ff; if (p2i == 0 && p1i == 0 && process->p2_virtual_bases[p3i] == 0) { length += 512 * 512 * 4096; continue; } if (p1i == 0 && process->p1_virtual_bases[p3i][p2i] == 0) { length += 512 * 4096; continue; } if (process->p1_virtual_bases[p3i][p2i][p1i] == 0) { length += 4096; continue; } start = start + length + 4096; length = 0; } return (void *)start; no_mem: panic("process out of virtual memory") } int load_elf( struct process *process, uint64_t *entry_out, const struct fs_info *fs_info, void *fs_node) { struct fs_stat stat; if ((*fs_info->stat_file)(fs_info, fs_node, &stat) != FAR_SUCCESS) return 0; if (stat.bytes < 58) return 0; uint8_t head_part[34]; if ((*fs_info->read_file)(fs_info, fs_node, head_part, 24, 34) != FAR_SUCCESS) return 0; *entry_out = *(uint64_t *)head_part; uint64_t phead_start = *(uint64_t *)(head_part + 8); uint16_t phead_entry_size = *(uint16_t *)(head_part + 30); uint16_t phead_entry_count = *(uint16_t *)(head_part + 32); if (stat.bytes < phead_start + phead_entry_count * phead_entry_size) panic("malformed elf") for (uint16_t i = 0; i < phead_entry_count; ++i) { uint64_t entry[6]; if ((*fs_info->read_file)( fs_info, fs_node, entry, phead_start + i * phead_entry_size, 6 * sizeof(uint64_t)) != FAR_SUCCESS) return 0; if ((entry[0] & 0xffffffff) != 1) continue; int executable = (entry[0] >> 32) & 0x1; int writable = (entry[0] >> 33) & 0x1; uint64_t file_start = entry[1]; void *virtual_start = (void *)entry[2]; uint64_t file_length = entry[4]; uint64_t virtual_length = ((entry[5] - 1) / 4096 + 1) * 4096; if (stat.bytes < file_start + file_length) return 0; if (file_length > virtual_length) return 0; if ((uint64_t)virtual_start % 4096 != 0 || virtual_length % 4096 != 0) return 0; for (uint64_t i = 0; i < virtual_length; i += 4096) { uint64_t pma = take_free_physical_page(); map_page_for_process( process, pma, virtual_start + i, writable, executable, 1); void *kvma = find_free_kernel_region(4096); map_in_kernel_page_table(pma, kvma, 1, 0); if (i + 4096 <= file_length) { if ((*fs_info->read_file)( fs_info, fs_node, kvma, file_start + i, 4096) != FAR_SUCCESS) return 0; } else if (i >= file_length) memzero(kvma, 4096); else { if ((*fs_info->read_file)( fs_info, fs_node, kvma, file_start + i, file_length & 0xfff) != FAR_SUCCESS) return 0; memzero(kvma + (file_length & 0xfff), 4096 - (file_length & 0xfff)); } unmap_kernel_page(kvma); } } return 1; } //defined in process.asm. enters user mode with: // cr3 = value of rbp when we jump here // rsp = value of rsp when we jump here // rip = rcx = value of r12 when we jump here // rflags = r11 = 0x200 (IF) // rdi = value of r13 when we jump here // all other registers zeroed extern uint8_t thread_start; int start_elf(const char *uri) { struct process *process = heap_alloc(sizeof(struct process)); create_process(process); const struct fs_info *fs_info; void *fs_node; uint64_t entry; if (look_up_file(uri, &fs_info, &fs_node) != FAR_SUCCESS || !load_elf(process, &entry, fs_info, fs_node)) { destroy_process(process); return 0; } if ((*fs_info->free_node)(fs_info, fs_node) != FAR_SUCCESS) panic("TODO") struct thread *thread = heap_alloc(sizeof(struct thread)); create_thread(process, thread); struct continuation_info ci; ci.rip = (uint64_t)&thread_start; ci.rbp = thread->process->p4_physical_base; ci.rsp = (uint64_t)thread->stack_top; ci.r12 = entry; ci.r13 = 0; ci.running_thread = thread; add_to_queue(&ready_continuations, &ci); return 1; } void syscall_create_thread(void (*f)(uint64_t), uint64_t x) { assert(running_thread != 0) struct thread *thread = heap_alloc(sizeof(struct thread)); create_thread(running_thread->process, thread); struct continuation_info ci; ci.rip = (uint64_t)&thread_start; ci.rbp = thread->process->p4_physical_base; ci.rsp = (uint64_t)thread->stack_top; ci.r12 = (uint64_t)f; ci.r13 = x; ci.running_thread = thread; add_to_queue(&ready_continuations, &ci); } int syscall_start_elf(const char *uri) { assert(running_thread != 0) if (!is_mapped_readable_string(running_thread->process, uri)) syscall_illegal_args(); return start_elf(uri); } void destroy_process(struct process *process) { if (process->files) { for (int i = 0; i < process->files_buffer_size; ++i) if (process->files[i].fs != 0) (*process->files[i].fs->free_node)(process->files[i].fs, process->files[i].node); heap_dealloc(process->files, process->files_buffer_size * sizeof(struct process_file_info)); } if (process->ipc_dgram_handles) { for (int i = 0; i < process->ipc_dgram_handles_buffer_size; ++i) if (process->ipc_dgram_handles[i].box != 0 && process->ipc_dgram_handles[i].is_receiver == 1) process->ipc_dgram_handles[i].box->is_receiver_held = 0; heap_dealloc( process->ipc_dgram_handles, process->ipc_dgram_handles_buffer_size * sizeof(struct process_ipc_dgram_handle_info)); } for (int p3i = 0; p3i < 512; ++p3i) if (process->p3_virtual_base[p3i]) { for (int p2i = 0; p2i < 512; ++p2i) if (process->p2_virtual_bases[p3i][p2i]) { for (int p1i = 0; p1i < 512; ++p1i) { uint64_t pma = process->p1_virtual_bases[p3i][p2i][p1i] & 0x7ffffffffffff000; if (process->owned_pages_bitmaps[p3i][p2i][p1i / 8] & (1 << (p1i % 8))) mark_physical_memory_free(pma, 4096); } unmap_and_free_kernel_page(process->p1_virtual_bases[p3i][p2i]); heap_dealloc(process->owned_pages_bitmaps[p3i][p2i], 64); } unmap_and_free_kernel_page(process->p2_virtual_bases[p3i]); heap_dealloc(process->p1_virtual_bases[p3i], 4096); heap_dealloc(process->owned_pages_bitmaps[p3i], 4096); } unmap_and_free_kernel_page(process->p3_virtual_base); unmap_and_free_kernel_page(process->p4_virtual_base); heap_dealloc(process, sizeof(struct process)); } void destroy_thread(struct thread *thread) { assert(thread->process->n_threads >= 1) if (thread->process->n_threads == 1) destroy_process(thread->process); else { --thread->process->n_threads; for (void *p = thread->stack_bottom; p < thread->stack_top; p += 4096) unmap_page_for_process(thread->process, p); } heap_dealloc(thread, sizeof(struct thread)); } #define INITIAL_STACK_SIZE (16 << 20) void create_thread(struct process *process, struct thread *thread_out) { //TODO: allocate stack as needed on page faults, have guard pages, etc. void *stack_bottom_vma = find_free_process_region(process, INITIAL_STACK_SIZE / 4096); for (int i = 0; i < INITIAL_STACK_SIZE / 4096; ++i) { uint64_t pma = take_free_physical_page(); map_page_for_process( process, pma, stack_bottom_vma + i * 4096, 1, 0, 1); void *kvma = find_free_kernel_region(4096); map_in_kernel_page_table(pma, kvma, 1, 0); memzero(kvma, 4096); unmap_kernel_page(kvma); } thread_out->process = process; thread_out->stack_bottom = stack_bottom_vma; thread_out->stack_top = stack_bottom_vma + INITIAL_STACK_SIZE; ++process->n_threads; } [[noreturn]] void syscall_illegal_args() { panic("bad syscall") } struct thread *running_thread = 0; [[noreturn]] static void syscall_end_thread_with_temporary_stack() { destroy_syscall_stack(most_recent_syscall_stack); resume_next_continuation(); } [[noreturn]] void syscall_end_thread() { assert(running_thread != 0) destroy_thread(running_thread); running_thread = 0; with_temporary_stack(&syscall_end_thread_with_temporary_stack); } void syscall_map_framebuffer(struct framebuffer_info *info_out) { assert(running_thread != 0) if (!is_mapped_writable( running_thread->process, info_out, sizeof(struct framebuffer_info))) syscall_illegal_args(); uint64_t pages_needed = (fb_pitch * fb_height - 1) / 4096 + 1; void *base = find_free_process_region(running_thread->process, pages_needed); for (uint64_t i = 0; i < pages_needed; ++i) map_page_for_process( running_thread->process, fb_physical_base + i * 4096, base + i * 4096, 1, 0, 0); info_out->fb_base = base; info_out->fb_pitch = fb_pitch; info_out->fb_height = fb_height; info_out->fb_width = fb_width; } #define INITIAL_FILE_HANDLE_COUNT 128 enum fs_access_result syscall_open_file(const char *path, file_handle_t *handle_out) { assert(running_thread != 0) if (!is_mapped_readable_string(running_thread->process, path) || !is_mapped_writable(running_thread->process, handle_out, sizeof(file_handle_t))) syscall_illegal_args(); const struct fs_info *fs; void *node; enum fs_access_result result = look_up_file(path, &fs, &node); if (result != FAR_SUCCESS) return result; struct fs_stat stat; result = (*fs->stat_file)(fs, node, &stat); if (result != FAR_SUCCESS) { (*fs->free_node)(fs, node); return result; } if (running_thread->process->files == 0) { running_thread->process->files = heap_alloc(INITIAL_FILE_HANDLE_COUNT * sizeof(struct process_file_info)); running_thread->process->files_buffer_size = INITIAL_FILE_HANDLE_COUNT; for (int i = 0; i < INITIAL_FILE_HANDLE_COUNT; ++i) running_thread->process->files[i].fs = 0; } for (int i = 0; i < running_thread->process->files_buffer_size; ++i) if (running_thread->process->files[i].fs == 0) { running_thread->process->files[i].fs = fs; running_thread->process->files[i].node = node; memcpy(&running_thread->process->files[i].stat, &stat, sizeof(struct fs_stat)); *handle_out = i; return FAR_SUCCESS; } struct process_file_info *old_buffer = running_thread->process->files; int old_size = running_thread->process->files_buffer_size; struct process_file_info *new_buffer = heap_alloc(2 * old_size * sizeof(struct process_file_info)); memcpy(new_buffer, old_buffer, old_size * sizeof(struct process_file_info)); heap_dealloc(old_buffer, old_size * sizeof(struct process_file_info)); new_buffer[old_size].fs = fs; new_buffer[old_size].node = node; memcpy(&new_buffer[old_size].stat, &stat, sizeof(struct fs_stat)); for (int i = old_size + 1; i < old_size * 2; ++i) new_buffer[i].fs = 0; running_thread->process->files = new_buffer; running_thread->process->files_buffer_size *= 2; *handle_out = old_size; return FAR_SUCCESS; } void syscall_close_file(file_handle_t handle) { assert(running_thread != 0) struct process_file_info *file = get_file_info(running_thread->process, handle); if (file != 0) { (*file->fs->free_node)(file->fs, file->node); file->fs = 0; } } enum fs_access_result syscall_get_file_size(file_handle_t handle, uint64_t *bytes_out) { assert(running_thread != 0) struct process_file_info *file = get_file_info(running_thread->process, handle); if (file == 0) return FAR_BAD_HANDLE; *bytes_out = file->stat.bytes; return FAR_SUCCESS; } enum fs_access_result syscall_read_file(struct read_file_parameter *parameter) { assert(running_thread != 0) if (!is_mapped_readable( running_thread->process, parameter, sizeof(struct read_file_parameter)) || !is_mapped_writable( running_thread->process, parameter->buffer, parameter->bytes)) syscall_illegal_args(); struct process_file_info *file = get_file_info(running_thread->process, parameter->handle); if (file == 0) return FAR_BAD_HANDLE; if (parameter->start + parameter->bytes > file->stat.bytes) return FAR_OUT_OF_BOUNDS; return (*file->fs->read_file)(file->fs, file->node, parameter->buffer, parameter->start, parameter->bytes); } void *syscall_map_pages(uint64_t count) { assert(running_thread != 0) void *vma = find_free_process_region(running_thread->process, count); for (uint64_t i = 0; i < count; ++i) { uint64_t pma = take_free_physical_page(); map_page_for_process( running_thread->process, pma, vma + i * 4096, 1, 0, 1); } return vma; } #define INITIAL_IPC_DGRAM_HANDLE_COUNT 16 static uint64_t get_free_ipc_dgram_handle(struct process *process) { if (process->ipc_dgram_handles == 0) { process->ipc_dgram_handles = heap_alloc(INITIAL_IPC_DGRAM_HANDLE_COUNT * sizeof(struct process_ipc_dgram_handle_info)); process->ipc_dgram_handles_buffer_size = INITIAL_IPC_DGRAM_HANDLE_COUNT; for (int i = 0; i < INITIAL_IPC_DGRAM_HANDLE_COUNT; ++i) process->ipc_dgram_handles[i].box = 0; return 0; } for (int i = 0; i < process->ipc_dgram_handles_buffer_size; ++i) if (process->ipc_dgram_handles[i].box == 0) return i; int old_size = process->ipc_dgram_handles_buffer_size; int new_size = old_size * 2; struct process_ipc_dgram_handle_info *new_buffer = heap_alloc(new_size * sizeof(struct process_ipc_dgram_handle_info)); memcpy( new_buffer, process->ipc_dgram_handles, old_size * sizeof(struct process_ipc_dgram_handle_info)); heap_dealloc( process->ipc_dgram_handles, old_size * sizeof(struct process_ipc_dgram_handle_info)); process->ipc_dgram_handles = new_buffer; process->ipc_dgram_handles_buffer_size = new_size; for (int i = old_size; i < new_size; ++i) new_buffer[i].box = 0; return old_size; } enum ipc_dgram_result syscall_ipc_create_dgram_receiver( const char *address, ipc_dgram_receiver_handle_t *handle_out) { assert(running_thread != 0) if (!is_mapped_readable_string(running_thread->process, address) || !is_mapped_writable( running_thread->process, handle_out, sizeof(ipc_dgram_receiver_handle_t))) syscall_illegal_args(); struct ipc_dgram_box *box = get_ipc_dgram_box(address); if (box->is_receiver_held) return IPR_IN_USE; box->is_receiver_held = 1; *handle_out = get_free_ipc_dgram_handle(running_thread->process); struct process_ipc_dgram_handle_info *info = &running_thread->process->ipc_dgram_handles[*handle_out]; info->box = box; info->is_receiver = 1; return IPR_SUCCESS; } enum ipc_dgram_result syscall_ipc_create_dgram_sender( const char *address, ipc_dgram_sender_handle_t *handle_out) { assert(running_thread != 0) if (!is_mapped_readable_string(running_thread->process, address) || !is_mapped_writable( running_thread->process, handle_out, sizeof(ipc_dgram_receiver_handle_t))) syscall_illegal_args(); struct ipc_dgram_box *box = get_ipc_dgram_box(address); *handle_out = get_free_ipc_dgram_handle(running_thread->process); struct process_ipc_dgram_handle_info *info = &running_thread->process->ipc_dgram_handles[*handle_out]; info->box = box; info->is_receiver = 0; return IPR_SUCCESS; } enum ipc_dgram_result syscall_ipc_receive_dgram( ipc_dgram_receiver_handle_t handle, void *buffer, int *bytes) { assert(running_thread != 0) if (!is_mapped_writable(running_thread->process, bytes, sizeof(uint64_t)) || !is_mapped_writable(running_thread->process, buffer, *bytes)) syscall_illegal_args(); if (running_thread->process->ipc_dgram_handles == 0 || handle >= (uint64_t)running_thread->process->ipc_dgram_handles_buffer_size || running_thread->process->ipc_dgram_handles[handle].box == 0 || running_thread->process->ipc_dgram_handles[handle].is_receiver != 1) return IPR_BAD_HANDLE; struct ipc_dgram_box *box = running_thread->process->ipc_dgram_handles[handle].box; int actual_bytes = get_ipc_dgram_size(box); assert(actual_bytes > 0); if (actual_bytes > *bytes) { *bytes = actual_bytes; return IPR_TOO_BIG; } receive_ipc_dgram(box, buffer); *bytes = actual_bytes; return IPR_SUCCESS; } enum ipc_dgram_result syscall_ipc_send_dgram( ipc_dgram_sender_handle_t handle, const void *data, int bytes) { assert(running_thread != 0) if (bytes <= 0 || !is_mapped_readable(running_thread->process, data, bytes)) syscall_illegal_args(); if (running_thread->process->ipc_dgram_handles == 0 || handle >= (uint64_t)running_thread->process->ipc_dgram_handles_buffer_size || running_thread->process->ipc_dgram_handles[handle].box == 0 || running_thread->process->ipc_dgram_handles[handle].is_receiver != 0) return IPR_BAD_HANDLE; struct ipc_dgram_box *box = running_thread->process->ipc_dgram_handles[handle].box; return send_ipc_dgram(box, data, bytes); }