/* Calcite, src/kernel/process.c * Copyright 2025 Benji Dial * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program. If not, see . */ #include "framebuffer.h" #include "scheduler.h" #include "process.h" #include "utility.h" #include "paging.h" #include "panic.h" #include "heap.h" #include "fs.h" void create_process(struct process *process_out) { process_out->p4_physical_base = take_free_physical_page(); process_out->p4_virtual_base = find_free_kernel_region(4096); map_in_kernel_page_table( process_out->p4_physical_base, process_out->p4_virtual_base, 1, 0); process_out->p3_physical_base = take_free_physical_page(); process_out->p3_virtual_base = find_free_kernel_region(4096); map_in_kernel_page_table( process_out->p3_physical_base, process_out->p3_virtual_base, 1, 0); process_out->p4_virtual_base[0] = process_out->p3_physical_base | 0x7; process_out->p4_virtual_base[511] = kernel_p3_physical_address | 0x3; for (int i = 1; i < 511; ++i) process_out->p4_virtual_base[i] = 0; for (int i = 0; i < 512; ++i) { process_out->p3_virtual_base[i] = 0; process_out->p2_virtual_bases[i] = 0; process_out->p1_virtual_bases[i] = 0; } process_out->n_threads = 0; } void map_page_for_process( struct process *process, uint64_t physical_base, void *virtual_base, int writable, int executable) { assert(physical_base % 4096 == 0) uint64_t vma = (uint64_t)virtual_base; assert(vma % 4096 == 0) assert(vma != 0) assert(vma < 0x0000008000000000) int p1i = (vma >> 12) & 0x1ff; int p2i = (vma >> 21) & 0x1ff; int p3i = (vma >> 30) & 0x1ff; if (process->p3_virtual_base[p3i] == 0) { uint64_t p2_pma = take_free_physical_page(); uint64_t *p2_vma = find_free_kernel_region(4096); map_in_kernel_page_table(p2_pma, p2_vma, 1, 0); process->p3_virtual_base[p3i] = p2_pma | 0x7; process->p2_virtual_bases[p3i] = p2_vma; process->p1_virtual_bases[p3i] = heap_alloc(4096); for (int i = 0; i < 512; ++i) { p2_vma[i] = 0; process->p1_virtual_bases[p3i][i] = 0; } } if (process->p2_virtual_bases[p3i][p2i] == 0) { uint64_t p1_pma = take_free_physical_page(); uint64_t *p1_vma = find_free_kernel_region(4096); map_in_kernel_page_table(p1_pma, p1_vma, 1, 0); process->p2_virtual_bases[p3i][p2i] = p1_pma | 0x7; process->p1_virtual_bases[p3i][p2i] = p1_vma; for (int i = 0; i < 512; ++i) p1_vma[i] = 0; } assert(process->p1_virtual_bases[p3i][p2i][p1i] == 0) process->p1_virtual_bases[p3i][p2i][p1i] = physical_base | 0x5 | (writable ? 0x2 : 0x0) | (executable ? 0 : 0x8000000000000000); } void unmap_page_for_process( struct process *process, void *virtual_base) { uint64_t vma = (uint64_t)virtual_base; assert(vma % 4096 == 0) assert(vma != 0) assert(vma < 0x0000008000000000) int p1i = (vma >> 12) & 0x1ff; int p2i = (vma >> 21) & 0x1ff; int p3i = (vma >> 30) & 0x1ff; assert( process->p1_virtual_bases[p3i] && process->p1_virtual_bases[p3i][p2i] && process->p1_virtual_bases[p3i][p2i][p1i]) process->p1_virtual_bases[p3i][p2i][p1i] = 0; } int is_mapped_writable(struct process *process, void *start, uint64_t length) { uint64_t vma_start = (uint64_t)start; uint64_t vma_end = vma_start + length; vma_start = (vma_start / 4096) * 4096; vma_end = ((vma_end - 1) / 4096 + 1) * 4096; for (uint64_t vma = vma_start; vma < vma_end; vma += 4096) { if (vma == 0 || vma >= 0x0000008000000000) return 0; int p1i = (vma >> 12) & 0x1ff; int p2i = (vma >> 21) & 0x1ff; int p3i = (vma >> 30) & 0x1ff; if (!process->p1_virtual_bases[p3i] || !process->p1_virtual_bases[p3i][p2i] || !process->p1_virtual_bases[p3i][p2i][p1i] || !(process->p1_virtual_bases[p3i][p2i][p1i] & 0x2)) return 0; } return 1; } void *find_free_process_region( struct process *process, uint64_t page_count) { uint64_t start = 512 * 512 * 4096; uint64_t length = 0; while (1) { if (length >= page_count * 4096) break; uint64_t vma = start + length; if (vma >= 0x0000008000000000) goto no_mem; int p1i = (vma >> 12) & 0x1ff; int p2i = (vma >> 21) & 0x1ff; int p3i = (vma >> 30) & 0x1ff; if (p2i == 0 && p1i == 0 && process->p2_virtual_bases[p3i] == 0) { length += 512 * 512 * 4096; continue; } if (p1i == 0 && process->p1_virtual_bases[p3i][p2i] == 0) { length += 512 * 4096; continue; } if (process->p1_virtual_bases[p3i][p2i][p1i] == 0) { length += 4096; continue; } start = start + length + 4096; length = 0; } return (void *)start; no_mem: panic("process out of virtual memory") } int load_elf( struct process *process, uint64_t *entry_out, const struct fs_info *fs_info, void *fs_node) { struct fs_stat stat; if ((*fs_info->stat_file)(fs_info, fs_node, &stat) != FAR_SUCCESS) return 0; if (stat.bytes < 58) return 0; uint8_t head_part[34]; if ((*fs_info->read_file)(fs_info, fs_node, head_part, 24, 34) != FAR_SUCCESS) return 0; *entry_out = *(uint64_t *)head_part; uint64_t phead_start = *(uint64_t *)(head_part + 8); uint16_t phead_entry_size = *(uint16_t *)(head_part + 30); uint16_t phead_entry_count = *(uint16_t *)(head_part + 32); if (stat.bytes < phead_start + phead_entry_count * phead_entry_size) panic("malformed elf") for (uint16_t i = 0; i < phead_entry_count; ++i) { uint64_t entry[6]; if ((*fs_info->read_file)( fs_info, fs_node, entry, phead_start + i * phead_entry_size, 6 * sizeof(uint64_t)) != FAR_SUCCESS) return 0; if ((entry[0] & 0xffffffff) != 1) continue; int executable = (entry[0] >> 32) & 0x1; int writable = (entry[0] >> 33) & 0x1; uint64_t file_start = entry[1]; void *virtual_start = (void *)entry[2]; uint64_t file_length = entry[4]; uint64_t virtual_length = ((entry[5] - 1) / 4096 + 1) * 4096; if (stat.bytes < file_start + file_length) return 0; if (file_length > virtual_length) return 0; if ((uint64_t)virtual_start % 4096 != 0 || virtual_length % 4096 != 0) return 0; for (uint64_t i = 0; i < virtual_length; i += 4096) { uint64_t pma = take_free_physical_page(); map_page_for_process( process, pma, virtual_start + i, writable, executable); void *kvma = find_free_kernel_region(4096); map_in_kernel_page_table(pma, kvma, 1, 0); if (i + 4096 <= file_length) { if ((*fs_info->read_file)( fs_info, fs_node, kvma, file_start + i, 4096) != FAR_SUCCESS) return 0; } else if (i >= file_length) memzero(kvma, 4096); else { if ((*fs_info->read_file)( fs_info, fs_node, kvma, file_start + i, file_length & 0xfff) != FAR_SUCCESS) return 0; memzero(kvma + (file_length & 0xfff), 4096 - (file_length & 0xfff)); } unmap_kernel_page(kvma); } } return 1; } //defined in process.asm. enters user mode with: // running_thread = value of rbx when we jump here // cr3 = value of rbp when we jump here // rsp = value of rsp when we jump here // rip = rcx = value of r12 when we jump here // rflags = r11 = 0x200 (IF) // all other registers zeroed extern uint8_t thread_start; int start_elf(const struct fs_info *fs_info, void *fs_node) { struct process *process = heap_alloc(sizeof(struct process)); create_process(process); uint64_t entry; if (!load_elf(process, &entry, fs_info, fs_node)) { destroy_process(process); return 0; } struct thread *thread = heap_alloc(sizeof(struct thread)); create_thread(process, thread); struct continuation_info ci; ci.rip = (uint64_t)&thread_start; ci.rbx = (uint64_t)thread; ci.rbp = thread->process->p4_physical_base; ci.rsp = (uint64_t)thread->stack_top; ci.r12 = entry; add_ready_continuation(&ci); return 1; } void destroy_process(struct process *process) { for (int p3i = 0; p3i < 512; ++p3i) if (process->p3_virtual_base[p3i]) { for (int p2i = 0; p2i < 512; ++p2i) if (process->p2_virtual_bases[p3i][p2i]) { for (int p1i = 0; p1i < 512; ++p1i) { uint64_t pma = process->p1_virtual_bases[p3i][p2i][p1i] & 0x7ffffffffffff000; if (pma >= fb_physical_base && pma < fb_physical_base + fb_pitch * fb_height) continue; mark_physical_memory_free(pma, 4096); } unmap_and_free_kernel_page(process->p1_virtual_bases[p3i][p2i]); } unmap_and_free_kernel_page(process->p2_virtual_bases[p3i]); heap_dealloc(process->p1_virtual_bases[p3i], 4096); } unmap_and_free_kernel_page(process->p3_virtual_base); unmap_and_free_kernel_page(process->p4_virtual_base); heap_dealloc(process, sizeof(struct process)); } void destroy_thread(struct thread *thread) { assert(thread->process->n_threads >= 1) if (thread->process->n_threads == 1) destroy_process(thread->process); else { --thread->process->n_threads; for (void *p = thread->stack_bottom; p < thread->stack_top; p += 4096) unmap_page_for_process(thread->process, p); } heap_dealloc(thread, sizeof(struct thread)); } #define INITIAL_STACK_SIZE (16 << 20) void create_thread(struct process *process, struct thread *thread_out) { //TODO: allocate stack as needed on page faults, have guard pages, etc. void *stack_bottom_vma = find_free_process_region(process, INITIAL_STACK_SIZE / 4096); for (int i = 0; i < INITIAL_STACK_SIZE / 4096; ++i) { uint64_t pma = take_free_physical_page(); map_page_for_process( process, pma, stack_bottom_vma + i * 4096, 1, 0); void *kvma = find_free_kernel_region(4096); map_in_kernel_page_table(pma, kvma, 1, 0); memzero(kvma, 4096); unmap_kernel_page(kvma); } thread_out->process = process; thread_out->stack_bottom = stack_bottom_vma; thread_out->stack_top = stack_bottom_vma + INITIAL_STACK_SIZE; ++process->n_threads; } struct thread *running_thread = 0; [[noreturn]] void syscall_end_thread() { assert(running_thread != 0) destroy_thread(running_thread); running_thread = 0; resume_next_continuation(); } void syscall_map_framebuffer(struct framebuffer_info *info_out) { if (!is_mapped_writable( running_thread->process, info_out, sizeof(struct framebuffer_info))) panic("bad syscall"); uint64_t pages_needed = (fb_pitch * fb_height - 1) / 4096 + 1; void *base = find_free_process_region(running_thread->process, pages_needed); for (uint64_t i = 0; i < pages_needed; ++i) map_page_for_process( running_thread->process, fb_physical_base + i * 4096, base + i * 4096, 1, 0); info_out->fb_base = base; info_out->fb_pitch = fb_pitch; info_out->fb_height = fb_height; info_out->fb_width = fb_width; }