From ab9f6b53d563b719f86ea9d4a74953a9f2ed3057 Mon Sep 17 00:00:00 2001 From: Ebrahim Aleem Date: Tue, 10 Feb 2026 16:31:15 -0800 Subject: [PATCH 1/4] minimal slab allocator --- Makefile | 2 +- boot/multiboot2/boot.S | 10 +- boot/multiboot2/init.c | 10 +- drivers/acpica_osl/osl.c | 50 +- drivers/hpet/hpet_init.c | 2 +- drivers/include/acpica/platform/acmodulos.h | 2 +- kernel/core/alloc.c | 263 ++++++++--- kernel/core/clock_src.c | 2 +- kernel/core/cpu_instr.S | 1 + kernel/core/kentry.c | 4 +- kernel/core/lock.S | 5 + kernel/core/mm.c | 431 ++++++++++++++++-- kernel/core/mm_init.c | 226 --------- kernel/core/paging.c | 298 +++--------- kernel/core/proc_data.S | 43 ++ kernel/include/core/alloc.h | 36 +- kernel/include/core/lock.h | 1 + kernel/include/core/mm.h | 51 +-- kernel/include/core/paging.h | 13 +- .../include/core/{mm_init.h => proc_data.h} | 33 +- kernel/include/lib/mergesort.h | 23 + kernel/lib/mergesort.c | 72 +++ 22 files changed, 926 insertions(+), 652 deletions(-) delete mode 100644 kernel/core/mm_init.c create mode 100644 kernel/core/proc_data.S rename kernel/include/core/{mm_init.h => proc_data.h} (61%) create mode 100644 kernel/include/lib/mergesort.h create mode 100644 kernel/lib/mergesort.c diff --git a/Makefile b/Makefile index c7b2ca2..cc357d4 100644 --- a/Makefile +++ b/Makefile @@ -18,7 +18,7 @@ # Debug options #export DEBUG = 1 -#export DEBUG_LOGGING = 1 +export DEBUG_LOGGING = 1 # Global options diff --git a/boot/multiboot2/boot.S b/boot/multiboot2/boot.S index 2cc6830..c5be9c5 100644 --- a/boot/multiboot2/boot.S +++ b/boot/multiboot2/boot.S @@ -288,9 +288,10 @@ gdt_ptr64.addr: kernel_pml4: PML4: .quad PDPT_0 + KERNEL_PAGE_FLAGS -.rept 0x200 - 2 +.rept 0x200 - 3 .quad 0 .endr + .quad PDPT_510 + KERNEL_PAGE_FLAGS .quad PDPT_511 + KERNEL_PAGE_FLAGS PDPT_0: @@ -300,6 +301,13 @@ PDPT_0: .quad 0 .endr +PDPT_510: +.set i, 0 +.rept 0x200 + .quad (i * PAGING_1GIB) + KERNEL_PAGE_FLAGS + PAGE_SZ + .set i, i+1 +.endr + PDPT_511: .rept 0x200 - 2 .quad 0 diff --git a/boot/multiboot2/init.c b/boot/multiboot2/init.c index b77d7c1..47131c8 100644 --- a/boot/multiboot2/init.c +++ b/boot/multiboot2/init.c @@ -20,9 +20,10 @@ #include #include -#include +#include #include #include +#include #include @@ -46,6 +47,9 @@ #define VIDEO_XRGB8888_BLUEMASK 0x08 #define VIDEO_XRGB8888_BPP 32 +static struct proc_data_t bsp_proc_data; +static struct proc_data_t* bsp_proc_data_ptr; + struct mb2_tag_memmap_entry_t { uint64_t base; uint64_t length; @@ -134,6 +138,10 @@ static void next_segment(uint64_t* handle, struct mem_segment_t* seg) { void multiboot2_init(struct mb2_info_t* info) { + bsp_proc_data_ptr = &bsp_proc_data; + proc_data_ptr = &bsp_proc_data_ptr; + proc_data_set_id(0); + proc_data_get()->arb_id = ++proc_arb_id; logging_init(); #ifdef SERIAL diff --git a/drivers/acpica_osl/osl.c b/drivers/acpica_osl/osl.c index 70df0f8..3c35290 100644 --- a/drivers/acpica_osl/osl.c +++ b/drivers/acpica_osl/osl.c @@ -35,15 +35,15 @@ #include #include #include +#include +#include ACPI_STATUS AcpiOsInitialize(void) { return AE_OK; } ACPI_THREAD_ID AcpiOsGetThreadId(void) { - //TODO: implement - //logging_log_warning("Call to unfinished AcpiOsGetThreadId"); - return 1; + return proc_data_get()->arb_id; } void ACPI_INTERNAL_XFACE AcpiOsPrintf(const char* fmt, ...) { @@ -62,9 +62,7 @@ void* AcpiOsAllocate(ACPI_SIZE size) { } void AcpiOsFree(void* ptr) { - //TODO: implement - //logging_log_warning("Call to unfinished AcpiOsFree 0x%lx", ptr); - (void)ptr; + kfree(ptr); } ACPI_STATUS AcpiOsCreateSemaphore(UINT32 cap, UINT32 init, ACPI_SEMAPHORE* handle) { @@ -101,30 +99,26 @@ ACPI_STATUS AcpiOsSignalSemaphore(ACPI_SEMAPHORE handle, UINT32 units) { } ACPI_STATUS AcpiOsCreateLock(ACPI_SPINLOCK* handle) { - //TODO: implement - *handle = (void*)1; - //logging_log_warning("Call to unfinished AcpiOsCreateLock"); - return AE_OK; + *handle = kmalloc(sizeof(uint8_t)); + if (*handle) { + lock_init(*handle); + return AE_OK; + } + return AE_ERROR; } void AcpiOsDeleteLock(ACPI_SPINLOCK handle) { - //TODO: implement - //logging_log_warning("Call to unfinished AcpiOsCreateLock"); - (void)handle; + kfree(handle); } ACPI_CPU_FLAGS AcpiOsAcquireLock(ACPI_SPINLOCK handle) { - //TODO: implement - //logging_log_warning("Call to unfinished AcpiOsAcquireLock"); - (void)handle; + lock_acquire(handle); return 0; } void AcpiOsReleaseLock(ACPI_SPINLOCK handle, ACPI_CPU_FLAGS flg) { - //TODO: implement - //logging_log_warning("Call to unfinished AcpiOsReleaseLock"); - (void)handle; (void)flg; + lock_release(handle); } ACPI_STATUS AcpiOsReadMemory(ACPI_PHYSICAL_ADDRESS paddr, UINT64* val, UINT32 width) { @@ -238,15 +232,10 @@ void* AcpiOsMapMemory(ACPI_PHYSICAL_ADDRESS paddr, ACPI_SIZE len) { const uint64_t page_base = paddr & PAGE_BASE_MASK; const uint64_t adj = paddr - page_base; len += adj; - uint64_t order = mm_lowest_order(len); - if (order == (uint64_t)-1) { - logging_log_error("Could not allocate contigious virtual block of 0x%lx", (uint64_t)len); - panic(PANIC_NO_MEM); - } - const uint64_t vaddr = mm_alloc_dv((enum mm_order_t)order); + const uint64_t vaddr = mm_alloc_v(len); if (!vaddr) { logging_log_error("Could not allocate contigious virtual block of 0x%lx", (uint64_t)len); - panic(PANIC_NO_MEM); + return 0; } for (uint64_t i = 0; i < len; i += PAGE_SIZE_4K) { @@ -257,9 +246,12 @@ void* AcpiOsMapMemory(ACPI_PHYSICAL_ADDRESS paddr, ACPI_SIZE len) { } void AcpiOsUnmapMemory(void* vaddr, ACPI_SIZE len) { - //logging_log_warning("Call to unfinished AcpiOsUnmapMemory"); - (void)vaddr; - (void)len; + const uint64_t page_base = (uint64_t)vaddr & PAGE_BASE_MASK; + const uint64_t adj = (uint64_t)vaddr - page_base; + len += adj; + for (uint64_t i = 0; i < len; i += PAGE_SIZE_4K) { + paging_unmap(page_base + i, PAGE_4K); + } } ACPI_STATUS AcpiOsTableOverride(ACPI_TABLE_HEADER* existing, ACPI_TABLE_HEADER** override) { diff --git a/drivers/hpet/hpet_init.c b/drivers/hpet/hpet_init.c index 7d10d7e..71425f1 100644 --- a/drivers/hpet/hpet_init.c +++ b/drivers/hpet/hpet_init.c @@ -75,7 +75,7 @@ void hpet_init(void) { for (uint8_t i = 0; i < 8; i++) { if (hpet_reg_bases[i]) { logging_log_debug("Found HPET register base @ 0x%lx", hpet_reg_bases[i]); - const uint64_t temp = mm_alloc_dv(MM_ORDER_4K); + const uint64_t temp = mm_alloc_v(PAGE_SIZE_4K); paging_map( temp, (uint64_t)hpet_reg_bases[i], diff --git a/drivers/include/acpica/platform/acmodulos.h b/drivers/include/acpica/platform/acmodulos.h index 3d22630..dad0fd3 100644 --- a/drivers/include/acpica/platform/acmodulos.h +++ b/drivers/include/acpica/platform/acmodulos.h @@ -227,6 +227,6 @@ static uint8_t _stub(void) { } while (0) #define ACPI_SEMAPHORE uint64_t* -#define ACPI_SPINLOCK uint64_t* +#define ACPI_SPINLOCK uint8_t* #endif /* DRIVERS_ACPICA_PLATFORM_ACMODULOS_H */ diff --git a/kernel/core/alloc.c b/kernel/core/alloc.c index 20f4d8e..d229f3d 100644 --- a/kernel/core/alloc.c +++ b/kernel/core/alloc.c @@ -19,100 +19,221 @@ #include #include +#include #include -#include #include -#include +#include + +#define SLAB_SIZE_1 0x1 +#define SLAB_SIZE_2 0x2 +#define SLAB_SIZE_4 0x4 +#define SLAB_SIZE_8 0x8 +#define SLAB_SIZE_16 0x10 +#define SLAB_SIZE_32 0x20 +#define SLAB_SIZE_64 0x40 +#define SLAB_SIZE_128 0x80 +#define SLAB_SIZE_256 0x100 +#define SLAB_SIZE_512 0x200 +#define SLAB_SIZE_1K 0x400 +#define SLAB_SIZE_2K 0x800 +#define SLAB_SIZE_4K 0x1000 +#define SLAB_SIZE_8K 0x2000 +#define SLAB_SIZE_16K 0x4000 +#define SLAB_SIZE_32K 0x8000 +#define SLAB_SIZE_64K 0x10000 +#define SLAB_SIZE_128K 0x20000 +#define SLAB_SIZE_256K 0x40000 +#define SLAB_SIZE_512K 0x80000 +#define SLAB_SIZE_1M 0x100000 +#define SLAB_SIZE_2M 0x200000 + +struct cache_node_t { + uint64_t base; + struct cache_node_t* next; +}; -#include +static struct cache_node_t* create_node_block(struct cache_node_t** tail) { + uint64_t i; -#define ARENA_SIZE 0x200000 + struct cache_node_t* list = (struct cache_node_t*)paging_ident(mm_alloc_p2m()); -#define EXTENT_FREE 0 -#define EXTENT_USED 1 + for (i = 0; i < SIZE_2M / sizeof(struct cache_node_t) - 1; i++) { + list[i].next = &list[i+1]; + *tail = &list[i+1]; + } -struct arena_t { - struct arena_t* next; - uint64_t* hint; - uint64_t left; - uint64_t arena[(ARENA_SIZE - - sizeof(struct arena_t*) - - sizeof(uint64_t) - - sizeof(uint64_t)) / sizeof(uint64_t)]; -}; + return list; +} -_Static_assert(sizeof(struct arena_t) == ARENA_SIZE, "arena must be 2M"); +static struct cache_node_t* alloc_cache_node(void) { + struct proc_data_t* data = proc_data_get(); + struct cache_node_t* t; -static struct arena_t* base; -static struct arena_t* head; + lock_acquire(&data->alloc_caches.lock); + t = data->alloc_caches.avl; + if (t) { + data->alloc_caches.avl = t->next; + } + lock_release(&data->alloc_caches.lock); -void alloc_init(void) { - base = (struct arena_t*)mm_alloc_dv(MM_ORDER_2M); - head = base; - paging_early_map_2m((uint64_t)base, mm_early_alloc_2m(), PAGE_PRESENT | PAGE_RW); - - logging_log_debug("New arena @ 0x%lX", head); - - // preallocate prevents mm/alloc deadlock - kmemset(base, 0, ARENA_SIZE); - base->next = (struct arena_t*)mm_alloc_dv(MM_ORDER_2M); - paging_early_map_2m((uint64_t)base->next, mm_early_alloc_2m(), PAGE_PRESENT | PAGE_RW); - base->left = sizeof(base->arena); - base->hint = &base->arena[0]; + if (t) { + return t; + } + + struct cache_node_t* tail; + t = create_node_block(&tail); + if (t) { + lock_acquire(&data->alloc_caches.lock); + tail->next = data->alloc_caches.avl; + data->alloc_caches.avl = t->next; + lock_release(&data->alloc_caches.lock); + } + + return t; } -//TODO: allow chosing arena -void* kmalloc(size_t size) { - // TODO: reclaim freed - - // round to 8 - if (size % sizeof(uint64_t) != 0) { - size += sizeof(uint64_t) - (size % sizeof(uint64_t)); +static enum slab_t min_slab(size_t size) { + if (size <= SLAB_SIZE_1) { + return SLAB_1; + } + if (size <= SLAB_SIZE_2) { + return SLAB_2; + } + if (size <= SLAB_SIZE_4) { + return SLAB_4; + } + if (size <= SLAB_SIZE_8) { + return SLAB_8; + } + if (size <= SLAB_SIZE_16) { + return SLAB_16; + } + if (size <= SLAB_SIZE_32) { + return SLAB_32; + } + if (size <= SLAB_SIZE_64) { + return SLAB_64; + } + if (size <= SLAB_SIZE_128) { + return SLAB_128; + } + if (size <= SLAB_SIZE_256) { + return SLAB_256; + } + if (size <= SLAB_SIZE_512) { + return SLAB_512; + } + if (size <= SLAB_SIZE_1K) { + return SLAB_1K; + } + if (size <= SLAB_SIZE_2K) { + return SLAB_2K; + } + if (size <= SLAB_SIZE_4K) { + return SLAB_4K; } + if (size <= SLAB_SIZE_8K) { + return SLAB_8K; + } + if (size <= SLAB_SIZE_16K) { + return SLAB_16K; + } + if (size <= SLAB_SIZE_32K) { + return SLAB_32K; + } + if (size <= SLAB_SIZE_64K) { + return SLAB_64K; + } + if (size <= SLAB_SIZE_128K) { + return SLAB_128K; + } + if (size <= SLAB_SIZE_256K) { + return SLAB_256K; + } + if (size <= SLAB_SIZE_512K) { + return SLAB_512K; + } + if (size <= SLAB_SIZE_1M) { + return SLAB_1M; + } + if (size <= SLAB_SIZE_2M) { + return SLAB_2M; + } + return SLAB_MAX; +} + +static struct cache_node_t* create_cache(enum slab_t slab, struct cache_node_t** tail) { + uint64_t i; + + uint64_t slab_base = paging_ident(mm_alloc_p2m()); - // TODO: check if hint is free - if ((uint64_t)head->hint + size >= (uint64_t)head + ARENA_SIZE) { - head = head->next; - logging_log_debug("New arena @ 0x%lX", head); - kmemset(head, 0, ARENA_SIZE); - head->next = (struct arena_t*)mm_alloc_dv(MM_ORDER_2M); - paging_map((uint64_t)head->next, mm_alloc(MM_ORDER_2M), PAGE_PRESENT | PAGE_RW, PAGE_2M); - head->left = sizeof(head->arena); - head->hint = &head->arena[0]; + struct cache_node_t* cache = 0, * t; + + for (i = 0; i < SIZE_2M; i += (1 << (uint64_t)slab)) { + t = alloc_cache_node(); + if (!t) { + break; + } + t->next = cache; + t->base = i + slab_base; + if (!cache) { + *tail = t; + } + cache = t; } - void* const ret = &head->hint[1]; + return cache; +} + +void alloc_init(void) { + struct proc_data_t* data = proc_data_get(); + uint64_t i; - head->hint[0] = EXTENT_USED | size; - head->hint = (uint64_t*)((uint64_t)&head->hint[1] + size); - head->left -= size + sizeof(uint64_t); + data->alloc_caches.avl = 0; + lock_init(&data->alloc_caches.lock); - return ret; + for (i = 0; i < SLAB_MAX; i++) { + data->alloc_caches.slabs[i] = 0; + } } -void* early_kmalloc(size_t size) { - // TODO: reclaim freed - - // round to 8 - if (size % sizeof(uint64_t) != 0) { - size += sizeof(uint64_t) - (size % sizeof(uint64_t)); +void* kmalloc(size_t size) { + enum slab_t slab = min_slab(size); + if (slab == SLAB_MAX) { + return 0; } - // TODO: check if hint is free - if ((uint64_t)head->hint + size >= (uint64_t)head + ARENA_SIZE) { - head = head->next; - kmemset(head, 0, ARENA_SIZE); - head->next = (struct arena_t*)mm_alloc_dv(MM_ORDER_2M); - paging_early_map_2m((uint64_t)head->next, mm_early_alloc_2m(), PAGE_PRESENT | PAGE_RW); - head->left = sizeof(head->arena); - head->hint = &head->arena[0]; + uint64_t ret; + struct proc_data_t* data = proc_data_get(); + lock_acquire(&data->alloc_caches.lock); + struct cache_node_t* cache = data->alloc_caches.slabs[slab], * tail; + if (cache) { + ret = cache->base; + data->alloc_caches.slabs[slab] = cache->next; + + //TODO: track cache for freeing + lock_release(&data->alloc_caches.lock); } + else { + lock_release(&data->alloc_caches.lock); + cache = create_cache(slab, &tail); + if (!cache) { + return 0; + } + + lock_acquire(&data->alloc_caches.lock); + tail->next = data->alloc_caches.slabs[slab]; + data->alloc_caches.slabs[slab] = cache->next; - void* const ret = &head->hint[1]; + ret = cache->base; - head->hint[0] = EXTENT_USED | size; - head->hint = (uint64_t*)((uint64_t)&head->hint[1] + size); - head->left -= size + sizeof(uint64_t); + //TODO: track cache for freeing + lock_release(&data->alloc_caches.lock); + } + + return (void*)ret; +} - return ret; +void kfree(void* ptr) { + (void)ptr; } diff --git a/kernel/core/clock_src.c b/kernel/core/clock_src.c index 0cd2752..7d72d69 100644 --- a/kernel/core/clock_src.c +++ b/kernel/core/clock_src.c @@ -48,7 +48,7 @@ struct clock_src_t* clock_src_alloc(void) { if (clocks) { source = clocks->clock; clocks = clocks->next; - //TODO: free clock + kfree(clocks); } return source; diff --git a/kernel/core/cpu_instr.S b/kernel/core/cpu_instr.S index 5edaa40..2b5f773 100644 --- a/kernel/core/cpu_instr.S +++ b/kernel/core/cpu_instr.S @@ -43,5 +43,6 @@ ret .globl cpu_halt_loop cpu_halt_loop: +cli hlt jmp cpu_halt_loop diff --git a/kernel/core/kentry.c b/kernel/core/kentry.c index 77fdaa5..b7087dc 100644 --- a/kernel/core/kentry.c +++ b/kernel/core/kentry.c @@ -15,7 +15,6 @@ * along with this program. If not, see */ -#include "core/clock_src.h" #include #include @@ -25,6 +24,7 @@ #include #include #include +#include #include #include @@ -34,8 +34,6 @@ struct boot_context_t boot_context; -extern void test_tt(void); - void kentry(void) { logging_log_debug("Kernel Entry"); logging_log_debug("TSS and IDT init"); diff --git a/kernel/core/lock.S b/kernel/core/lock.S index 80cfa68..3f04ca8 100644 --- a/kernel/core/lock.S +++ b/kernel/core/lock.S @@ -15,6 +15,11 @@ * along with this program. If not, see */ +.globl lock_init +lock_init: +movq $0, (%rdi) +ret + .globl lock_acquire lock_acquire: movq $1, %rax diff --git a/kernel/core/mm.c b/kernel/core/mm.c index 6e3ee23..8cc97ba 100644 --- a/kernel/core/mm.c +++ b/kernel/core/mm.c @@ -20,78 +20,425 @@ #include #include -#include +#include +#include +#include -#define PAGE_SIZE 0x1000 -#define PD_ENTRY_SIZE 0x200000 -#define PDPT_ENTRY_SIZE 0x40000000 +#include -#define FRAME_USED 0x0 -#define FRAME_FREE 0x1 +#define PAGE_4K_MASK 0xFFFFFFFFFFFFF000 +#define SIZE_GIB (1024 * 1024 * 1024) -#define PV_ALLOC_ALIGN 0x200000 +#define MAX_BLOCKS 64 -uint64_t page_frames_num; -struct page_frame_t* page_frames; +struct frame_list_t { + uint64_t base; + uint64_t limit; + struct frame_list_t* next; +}; -uint64_t virt_limit; // pv start, alloc backwards -uint64_t mem_limit; +extern uint8_t _kernel_pend; -struct mm_order_entry_t order_entries[MM_MAX_ORDER]; +static void (*early_first_segment)(uint64_t* handle); +static void (*early_next_segment)(uint64_t* handle, struct mem_segment_t* seg); +static uint64_t kernel_limit; -static uint64_t dv_base; +static struct frame_list_t init_v_frame; -uint64_t mm_alloc(enum mm_order_t order) { +static struct frame_list_t* p_list; +static struct frame_list_t* v_list; + +static struct frame_list_t* p_2m_list; +static struct frame_list_t* v_2m_list; + +static uint8_t p_lock; +static uint8_t v_lock; + +static uint8_t p_2m_lock; +static uint8_t v_2m_lock; + +static struct frame_list_t init_p_array[3 * MAX_BLOCKS]; + +static uint64_t mm_alloc(size_t size, struct frame_list_t** list, void** tofree) { + struct frame_list_t* prev = 0; uint64_t ret; + *tofree = 0; + + for (struct frame_list_t* i = *list; i != 0; i = i->next) { + if (i->limit == size) { + if (prev) { + prev->next = i->next; + ret = i->base; + *tofree = i; + return ret; + } + + ret = i->base; + *list = i->next; + return ret; + } + if (i->limit > size) { + ret = i->base; + i->base += size; + i->limit -= size; + return ret; + } + } + + logging_log_error("Out of contigious memory"); + return 0; +} + +void mm_init( + void (*first_segment)(uint64_t* handle), + void (*next_segment)(uint64_t* handle, struct mem_segment_t* seg)) { + + kernel_limit = (uint64_t)&_kernel_pend; + early_first_segment = first_segment; + early_next_segment = next_segment; + + // initial vmem is always 2m aligned + v_2m_list = &init_v_frame; + init_v_frame.base = CANON_HIGH; + init_v_frame.limit = VIRTUAL_LIMIT - CANON_HIGH + 1; + + v_list = 0; + p_list = 0; + p_2m_list = 0; + + lock_init(&p_lock); + lock_init(&v_lock); + + lock_init(&p_2m_lock); + lock_init(&v_2m_lock); + + // find memory limit + uint64_t mem_limit = 0; + uint64_t blocks = 0; + + uint64_t handle; + struct mem_segment_t seg; + + first_segment(&handle); + for (early_next_segment(&handle, &seg); seg.size || seg.base; next_segment(&handle, &seg)) { + logging_log_debug("Memory segment 0x%lX (base) 0x%lX (size) 0x%lX (type)", + (uint64_t)seg.base, (uint64_t)seg.size, (uint64_t)seg.type); + if (seg.type == MEM_AVL && seg.base + seg.size > mem_limit) { + mem_limit = seg.base + seg.size; + blocks++; + } + } + + logging_log_info("Detected 0x%lX bytes (0x%lX GiB) of memory across %ld blocks", + mem_limit, (uint64_t)(mem_limit / SIZE_GIB), blocks); + + logging_log_debug("Initializing heap allocator"); + alloc_init(); + logging_log_debug("Heap allocator init done"); + + logging_log_debug("Initializing paging"); + paging_init(); + logging_log_debug("Paging init done"); + + struct frame_list_t* init_p = &init_p_array[0]; + + uint64_t skip = 0; + if (blocks > MAX_BLOCKS) { + skip = blocks - MAX_BLOCKS; // prefer later blocks as they are larger + } + uint64_t rem; + first_segment(&handle); + for (early_next_segment(&handle, &seg); seg.size || seg.base; ) { + if (seg.type != MEM_AVL || seg.size < PAGE_SIZE_4K || skip) { + goto next_seg; + } + + if (skip) { + skip--; + } + + if (seg.base <= kernel_limit) { + seg.base += PAGE_SIZE_4K; + seg.size -= PAGE_SIZE_4K; + continue; + } + + // align base to 4K + rem = seg.base % PAGE_SIZE_4K; + + if (rem) { + seg.base += PAGE_SIZE_4K - rem; + seg.size -= PAGE_SIZE_4K - rem; + } + + // try 2m first + if (seg.size >= 2 * SIZE_2M) { + // pre padding + rem = seg.base % SIZE_2M ; + uint64_t base = seg.base; + if (rem) { + init_p->base = seg.base; + init_p->limit = SIZE_2M - rem; + init_p->next = p_list; + p_list = init_p++; + + base += SIZE_2M - rem; + } + + // post padding + rem = seg.size % SIZE_2M ; + uint64_t limit = seg.size; + if (rem > PAGE_SIZE_4K) { + init_p->base = seg.base + seg.size - rem; + init_p->limit = rem & PAGE_4K_MASK; + init_p->next = p_list; + p_list = init_p++; + + limit -= rem & PAGE_4K_MASK; + } + + init_p->base = base; + init_p->limit = limit; + init_p->next = p_2m_list; + p_2m_list = init_p++; + goto next_seg; + } + + // otherwise add 4K + init_p->base = seg.base; + init_p->limit = seg.size & PAGE_4K_MASK; + init_p->next = p_list; + p_list = init_p++; + +next_seg: + next_segment(&handle, &seg); + } +} - if (order_entries[order].free == 0) { - // TODO: split - return mm_alloc(order + 1); +uint64_t mm_alloc_p(size_t size) { + if (size % PAGE_SIZE_4K) { + size += PAGE_SIZE_4K - (size % PAGE_SIZE_4K); } - // TODO: set used - ret = order_entries[order].free->base; - order_entries[order].free = order_entries[order].free->next; + void* tofree; + lock_acquire(&p_lock); + if (!p_list) { + lock_release(&p_lock); + lock_acquire(&p_2m_lock); + if (!p_2m_list) { + logging_log_error("Out of physical memory"); + return 0; + } + struct frame_list_t* temp = p_2m_list; + p_2m_list = p_2m_list->next; + lock_release(&p_2m_lock); + lock_acquire(&p_lock); + temp->next = p_list; + p_list = temp; + } + const uint64_t ret = mm_alloc(size, &p_list, &tofree); + lock_release(&p_lock); + if (tofree) { + kfree(tofree); + } return ret; } -uint64_t mm_alloc_pv(size_t size) { - if (size % PV_ALLOC_ALIGN != 0) { - size += PV_ALLOC_ALIGN - (size % PV_ALLOC_ALIGN); +uint64_t mm_alloc_v(size_t size) { + if (size % PAGE_SIZE_4K) { + size += PAGE_SIZE_4K - (size % PAGE_SIZE_4K); } - virt_limit -= size; - return virt_limit; + void* tofree; + lock_acquire(&v_lock); + if (!v_list) { + lock_release(&v_lock); + lock_acquire(&v_2m_lock); + if (!v_2m_list) { + logging_log_error("Out of virtual memory"); + return 0; + } + struct frame_list_t* temp = v_2m_list; + v_2m_list = v_2m_list->next; + lock_release(&v_2m_lock); + lock_acquire(&v_lock); + temp->next = v_list; + v_list = temp; + } + const uint64_t ret = mm_alloc(size, &v_list, &tofree); + lock_release(&v_lock); + if (tofree) { + kfree(tofree); + } + + return ret; } -void mm_init_dv(void) { - // TODO: create a linked list of free dvs starting from mem_limit - dv_base = mem_limit; +uint64_t mm_alloc_p2m(void) { + void* tofree; + lock_acquire(&p_2m_lock); + const uint64_t ret = mm_alloc(SIZE_2M , &p_2m_list, &tofree); + lock_release(&p_2m_lock); + + if (tofree) { + kfree(tofree); + } + + return ret; } -uint64_t mm_alloc_dv(enum mm_order_t order) { - const uint64_t block = (uint64_t)PAGE_SIZE << (uint64_t)order; +uint64_t mm_alloc_v2m(void) { + void* tofree; + lock_acquire(&v_2m_lock); + const uint64_t ret = mm_alloc(SIZE_2M , &v_2m_list, &tofree); + lock_release(&v_2m_lock); - if (dv_base % block != 0) { - dv_base += block - (dv_base % block); + if (tofree) { + kfree(tofree); } - dv_base += block; - return dv_base - block; + return ret; +} + +void mm_free_p(uint64_t addr, size_t size) { + struct frame_list_t* prepend = kmalloc(sizeof(struct frame_list_t)); + prepend->base = addr; + prepend->limit = size; + lock_acquire(&p_lock); + prepend->next = p_list; + p_list = prepend; + lock_release(&p_lock); +} + +void mm_free_v(uint64_t addr, size_t size) { + struct frame_list_t* prepend = kmalloc(sizeof(struct frame_list_t)); + prepend->base = addr; + prepend->limit = size; + lock_acquire(&v_lock); + prepend->next = v_list; + v_list = prepend; + lock_release(&v_lock); +} + +void mm_free_p2m(uint64_t addr) { + struct frame_list_t* prepend = kmalloc(sizeof(struct frame_list_t)); + prepend->base = addr; + prepend->limit = SIZE_2M ; + lock_acquire(&p_2m_lock); + prepend->next = p_2m_list; + p_2m_list = prepend; + lock_release(&p_2m_lock); } -uint64_t mm_lowest_order(size_t size) { - uint8_t order = 64 - (uint8_t)__builtin_clzll(size); - if (order < 12) { - return MM_ORDER_4K; +void mm_free_v2m(uint64_t addr) { + struct frame_list_t* prepend = kmalloc(sizeof(struct frame_list_t)); + prepend->base = addr; + prepend->limit = SIZE_2M ; + lock_acquire(&v_2m_lock); + prepend->next = v_2m_list; + v_2m_list = prepend; + lock_release(&v_2m_lock); +} + +static void** sort_next(void* _node) { + struct frame_list_t* node = _node; + return (void**)&node->next; +} + +static uint64_t sort_value(void* _node) { + struct frame_list_t* node = _node; + return node->base; +} + +static void defrag( + struct frame_list_t** list, + struct frame_list_t** m_list, + uint8_t* lock, + uint8_t* m_lock, + struct frame_list_t** tofree) { + + struct frame_list_t* promote = 0; + struct frame_list_t* i, * t, *prev = 0; + + lock_acquire(m_lock); + lock_acquire(lock); + *list = mergesort_ll_inplace_ul(*list, sort_next, sort_value); + + for (i = *list; i && i->next; ) { + if (i->base + i->limit == i->next->base) { + t = i->next; + i->next = t->next; + i->limit += t->limit; + + if (i->limit >= SIZE_2M && i->base % SIZE_2M == 0) { + t->base = i->base; + t->limit = i->limit - (i->limit % SIZE_2M); + t->next = promote; + promote = t; + + i->limit -= t->limit; + if (i->limit) { + i->base += t->limit; + } + else if (prev){ + prev->next = i->next; + i->next = *tofree; + *tofree = i; + i = prev->next; + continue; + } + else { + *list = (*list)->next; + i->next = *tofree; + *tofree = i; + i = *list; + continue; + } + } + else { + t->next = *tofree; + *tofree = t; + } + continue; + } + + prev = i; + i = i->next; } - order -= 12; + for (; promote; promote = t) { + t = promote->next; + promote->next = *m_list; + *m_list = promote; + } + + *m_list = mergesort_ll_inplace_ul(*m_list, sort_next, sort_value); + for (i = *m_list; i && i->next; ) { + if (i->base + i->limit == i->next->base) { + t = i->next; + i->next = t->next; + i->limit += t->limit; + + t->next = *tofree; + *tofree = t; + continue; + } - if (order > MM_MAX_ORDER) { - return (uint64_t)-1; + i = i->next; } + lock_release(lock); + lock_release(m_lock); +} + +void mm_defrag(void) { + struct frame_list_t* tofree = 0, * t; - return order; + defrag(&p_list, &p_2m_list, &p_lock, &p_2m_lock, &tofree); + defrag(&v_list, &v_2m_list, &v_lock, &v_2m_lock, &tofree); + + for (; tofree; tofree = t) { + t = tofree->next; + kfree(tofree); + } } diff --git a/kernel/core/mm_init.c b/kernel/core/mm_init.c deleted file mode 100644 index e45c5a2..0000000 --- a/kernel/core/mm_init.c +++ /dev/null @@ -1,226 +0,0 @@ -/* mm_init.c - memory manager initialization */ -/* Copyright (C) 2025-2026 Ebrahim Aleem -* -* This program is free software: you can redistribute it and/or modify -* it under the terms of the GNU General Public License as published by -* the Free Software Foundation, either version 3 of the License, or -* (at your option) any later version. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU General Public License for more details. -* -* You should have received a copy of the GNU General Public License -* along with this program. If not, see -*/ - -#include -#include - -#include -#include -#include -#include -#include -#include - -#include - -#define ALLOCATION_UNIT 0x200000 -#define PAGE_SIZE 0x1000 - -// it should not take more than 4GiB to bootstrap mm -#define MAX_BOOTSTRAP 2048 - -#define INITIAL_VIRTUAL_LIMIT 0xFFFFFFFF80000000 - -#define ORDER_SIZE(order) ((uint64_t)(PAGE_SIZE * (1 << (uint64_t)order))) - -#define SIZE_GIB (1024 * 1024 * 1024) - -extern uint64_t page_frames_num; -extern struct page_frame_t* page_frames; -extern uint64_t virt_limit; -extern struct mm_order_entry_t order_entries[MM_MAX_ORDER]; -extern uint8_t _kernel_pend; - -static uint64_t used[MAX_BOOTSTRAP]; - -static void (*early_first_segment)(uint64_t* handle); -static void (*early_next_segment)(uint64_t* handle, struct mem_segment_t* seg); -static uint64_t early_skip; -static uint64_t kernel_limit; - -extern uint64_t mem_limit; - -void mm_init( - void (*first_segment)(uint64_t* handle), - void (*next_segment)(uint64_t* handle, struct mem_segment_t* seg)) { - - kernel_limit = (uint64_t)&_kernel_pend; - early_first_segment = first_segment; - early_next_segment = next_segment; - early_skip = 0; - - // permanent virtual addresses are useful for early memory/paging - virt_limit = INITIAL_VIRTUAL_LIMIT; - - logging_log_debug("Initializing paging"); - paging_init(); - logging_log_debug("Early paging done"); - - // find memory limit - mem_limit = 0; - - uint64_t handle; - struct mem_segment_t seg; - - first_segment(&handle); - for (early_next_segment(&handle, &seg); seg.size || seg.base; next_segment(&handle, &seg)) { - logging_log_debug("Memory segment 0x%lX (base) 0x%lX (size) 0x%lX (type)", - (uint64_t)seg.base, (uint64_t)seg.size, (uint64_t)seg.type); - if (seg.type == MEM_AVL && seg.base + seg.size > mem_limit) mem_limit = seg.base + seg.size; - } - - logging_log_info("Detected 0x%lX bytes (0x%lX GiB) of memory", mem_limit, (uint64_t)(mem_limit / SIZE_GIB)); - - // find number of pages - page_frames_num = (mem_limit + PAGE_SIZE - 1) / PAGE_SIZE; - const uint64_t frames_size = sizeof(struct page_frame_t) * page_frames_num; - - // allocate pv - page_frames = (struct page_frame_t*)mm_alloc_pv(frames_size); - - uint64_t i; - for (i = (uint64_t)page_frames; i < (uint64_t)page_frames + frames_size; i += ALLOCATION_UNIT) { - paging_early_map_2m(i, mm_early_alloc_2m(), PAGE_PRESENT | PAGE_RW); - } - - mm_init_dv(); - alloc_init(); - - // now bootstrapping mm, paging, and heap is setup - - // setup order entries and frame array - for (i = 0; i < MM_MAX_ORDER; i++) { - order_entries[i].free = 0; - order_entries[i].bitmap = early_kmalloc(page_frames_num / (uint64_t)(8 * (1 << i))); - kmemset(order_entries[i].bitmap, 0, page_frames_num / (uint64_t)(8 * ( 1 << i))); - } - - i = early_skip; - struct mm_free_buddy_t* t; - first_segment(&handle); - for (early_next_segment(&handle, &seg); seg.size || seg.base; ) { - if (seg.type == MEM_AVL) { - if (seg.base <= kernel_limit && seg.size >= PAGE_SIZE) { - seg.base += PAGE_SIZE; - seg.size -= PAGE_SIZE; - continue; - } - - if (i > 0 && seg.size >= 2 * ALLOCATION_UNIT) { - seg.size -= ALLOCATION_UNIT + (seg.base % ALLOCATION_UNIT == 0 - ? 0 : (ALLOCATION_UNIT - (seg.base % ALLOCATION_UNIT))); - seg.base += ALLOCATION_UNIT + (seg.base % ALLOCATION_UNIT == 0 - ? 0 : (ALLOCATION_UNIT - (seg.base % ALLOCATION_UNIT))); - i--; - continue; - } - - if (seg.size >= PAGE_SIZE) { - int8_t order; - for (order = MM_MAX_ORDER - 1; order >= 0; order--) { - if (seg.size >= ORDER_SIZE(order) && seg.base % ORDER_SIZE(order) == 0) { - t = order_entries[order].free; - order_entries[order].free = early_kmalloc(sizeof(struct mm_free_buddy_t)); - order_entries[order].free->base = seg.base; - order_entries[order].free->next = t; - - seg.base += ORDER_SIZE(order); - seg.size -= ORDER_SIZE(order); - break; - } - } - - // if no hit, round to page - if (order == -1) { - seg.size -= PAGE_SIZE - (seg.base % PAGE_SIZE); - seg.base += PAGE_SIZE - (seg.base % PAGE_SIZE); - } - continue; - } - } - - next_segment(&handle, &seg); - } - - // account for lost padding - uint64_t prv_cons = 0; - uint64_t size; - for (i = 0; i < early_skip; i++) { - if (used[i] != prv_cons + ALLOCATION_UNIT && used[i] % ALLOCATION_UNIT != 0) { - size = ALLOCATION_UNIT - (used[i] % ALLOCATION_UNIT); - while (size >= PAGE_SIZE) { - int8_t order; - for (order = MM_ORDER_2M - 1; order >= 0; order--) { - if (size >= ORDER_SIZE(order) && used[i] % ORDER_SIZE(order) == 0) { - t = order_entries[order].free; - order_entries[order].free = early_kmalloc(sizeof(struct mm_free_buddy_t)); - order_entries[order].free->base = used[i]; - order_entries[order].free->next = t; - - used[i] += ORDER_SIZE(order); - size -= ORDER_SIZE(order); - break; - } - } - - // if no hit, round to page - if (order == -1) { - size -= PAGE_SIZE - (used[i] % PAGE_SIZE); - used[i] += PAGE_SIZE - (used[i] % PAGE_SIZE); - } - } - } - - prv_cons = used[i]; - } -} - -uint64_t mm_early_alloc_2m(void) { - - uint64_t handle, i = early_skip; - struct mem_segment_t seg; - - if (early_skip == MAX_BOOTSTRAP) { - panic(PANIC_NO_MEM); - } - - early_first_segment(&handle); - for (early_next_segment(&handle, &seg); seg.size || seg.base; ) { - // ensuring twice the size makes math a lot easier - if (seg.type == MEM_AVL && seg.size >= 2 * ALLOCATION_UNIT) { - if (seg.base <= kernel_limit) { - seg.base += ALLOCATION_UNIT; - seg.size -= ALLOCATION_UNIT; - continue; - } - - if (i--) { - seg.base += ALLOCATION_UNIT; - seg.size -= ALLOCATION_UNIT; - continue; - } - - used[early_skip++] = seg.base; - if (seg.base % ALLOCATION_UNIT == 0) return seg.base; - else return seg.base + ALLOCATION_UNIT - (seg.base % ALLOCATION_UNIT); - } - - early_next_segment(&handle, &seg); - } - - panic(PANIC_NO_MEM); -} diff --git a/kernel/core/paging.c b/kernel/core/paging.c index 83d8feb..47bf506 100644 --- a/kernel/core/paging.c +++ b/kernel/core/paging.c @@ -19,271 +19,123 @@ #include #include +#include #include -#include #include #include +#include #include #define PAGE_PS 0x80 -#define PAGE_TBL_FLG (PAGE_PRESENT | PAGE_RW) -#define PAGE_SIZE 0x1000 - -#define TABLE_PADDR_MASK 0xFFFFFFFFFFFFF000 - -#define POOL_SIZE 0x200000 - -#define GET_TABLE(entry) ((uint64_t*)((uint64_t)entry & TABLE_PADDR_MASK)) - -#define PDPT_PADDR_MASK 0xFFFFFFFFC0000000 -#define PD_PADDR_MASK 0xFFFFFFFFFFE00000 +#define PAGE_ADDR_MASK 0x0000FFFFFFFFF000 +#define PAGE_ADDR_PAT_MASK 0x0000FFFFFFFFE000 #define GET_PT_INDEX(addr) ((addr & 0x1FF000) >> 12) #define GET_PD_INDEX(addr) ((addr & 0x3FE00000) >> 21) #define GET_PDPT_INDEX(addr) ((addr & 0x7FC0000000) >> 30) #define GET_PML4_INDEX(addr) ((addr & 0xFF8000000000) >> 39) -struct page_table_t; - extern uint64_t kernel_pml4[512]; -static uint64_t bootstrap_pdpt[512] __attribute__((aligned(PAGE_SIZE))); -static uint64_t bootstrap_pd[512] __attribute__((aligned(PAGE_SIZE))); - -struct paging_pool_header_t { - uint16_t used; - struct paging_pool_header_t* next; - uint8_t hint; - uint8_t ac; - uint8_t bitmap[64]; - uint8_t resv[4020]; -} __attribute__((packed)); - -_Static_assert(sizeof(struct paging_pool_header_t) == PAGE_SIZE, "paging pool header must be 4K"); - -struct paging_pool_header_t* root_pool; - -struct paging_pool_header_t* hint; - -static struct paging_pool_header_t* early_create_pool(void); -static struct paging_pool_header_t* create_pool(void); +static uint8_t paging_lock; -static uint64_t early_alloc_page(void) { - if (hint->used == 512) { - // last pool is always empty - for (hint = root_pool; hint->used == 512; hint = hint->next); +static enum page_size_t page_walk(uint64_t vaddr, uint64_t** access) { + uint64_t entry; + *access = (uint64_t*)paging_ident((uint64_t)&kernel_pml4[0]); - if (hint->ac == 0) { - hint->ac = 1; - hint->next = early_create_pool(); - } + entry = (*access)[GET_PML4_INDEX(vaddr)]; + *access = &(*access)[GET_PML4_INDEX(vaddr)]; + if (!(entry & PAGE_PRESENT)) { + return _PAGE_512G; } + *access = (uint64_t*)paging_ident((**access & PAGE_ADDR_MASK)); - for (; hint->hint < 64; hint->hint++) { - if (hint->bitmap[hint->hint] != 0xFF) { - for (uint8_t i = 0; i < 8; i++) { - if (((hint->bitmap[hint->hint] & (1 << i))) != (1 << i)) { - hint->bitmap[hint->hint] |= 1 << i; - hint->used++; - const uint64_t addr = (uint64_t)hint + PAGE_SIZE * ((uint64_t)hint->hint * 8 + (uint64_t)i); - kmemset((void*)addr, 0, PAGE_SIZE); - return addr; - } - } - } + entry = (*access)[GET_PDPT_INDEX(vaddr)]; + *access = &(*access)[GET_PDPT_INDEX(vaddr)]; + if (!(entry & PAGE_PRESENT)) { + return PAGE_1G; } - hint->hint = 0; - return early_alloc_page(); -} - -static uint64_t alloc_page(void) { - if (hint->used == 512) { - // last pool is always empty - for (hint = root_pool; hint->used == 512; hint = hint->next); - - if (hint->ac == 0) { - hint->ac = 1; - hint->next = create_pool(); - } + if (entry & PAGE_PS) { + return PAGE_1G; } + *access = (uint64_t*)paging_ident((**access & PAGE_ADDR_MASK)); - for (; hint->hint < 64; hint->hint++) { - if (hint->bitmap[hint->hint] != 0xFF) { - for (uint8_t i = 0; i < 8; i++) { - if (((hint->bitmap[hint->hint] & (1 << i))) != (1 << i)) { - hint->bitmap[hint->hint] |= 1 << i; - hint->used++; - const uint64_t addr = (uint64_t)hint + PAGE_SIZE * ((uint64_t)hint->hint * 8 + (uint64_t)i); - kmemset((void*)addr, 0, PAGE_SIZE); - return addr; - } - } - } + entry = (*access)[GET_PD_INDEX(vaddr)]; + *access = &(*access)[GET_PD_INDEX(vaddr)]; + if (!(entry & PAGE_PRESENT)) { + return PAGE_2M; } - hint->hint = 0; - return alloc_page(); -} - -static struct paging_pool_header_t* early_create_pool(void) { - struct paging_pool_header_t* const addr = (struct paging_pool_header_t*)mm_early_alloc_2m(); - paging_early_map_2m((uint64_t)addr, (uint64_t)addr, PAGE_PRESENT | PAGE_RW); - - kmemset(addr, 0, POOL_SIZE); - addr->bitmap[0] = 0x01; - - return addr; -} - -static struct paging_pool_header_t* create_pool(void) { - struct paging_pool_header_t* const addr = (struct paging_pool_header_t*)mm_alloc(MM_ORDER_2M); - paging_map((uint64_t)addr, (uint64_t)addr, PAGE_PRESENT | PAGE_RW, PAGE_2M); - - kmemset(addr, 0, POOL_SIZE); - addr->bitmap[0] = 0x01; + if (entry & PAGE_PS) { + return PAGE_2M; + } + *access = (uint64_t*)paging_ident((**access & PAGE_ADDR_MASK)); - return addr; + *access = &(*access)[GET_PT_INDEX(vaddr)]; + return PAGE_4K; } void paging_init(void) { - const uint64_t pool_base = mm_early_alloc_2m(); - - uint64_t pdpt = (uint64_t)kernel_pml4[GET_PML4_INDEX(pool_base)]; - - if ((pdpt & PAGE_PRESENT) != PAGE_PRESENT) { - pdpt = ((uint64_t)&bootstrap_pdpt[0] - KERNEL_VMA) | PAGE_PRESENT | PAGE_RW; - kernel_pml4[GET_PML4_INDEX(pool_base)] = pdpt; - } - - - uint64_t pd = GET_TABLE(pdpt)[GET_PDPT_INDEX(pool_base)]; - - if ((pd & PAGE_PS) != PAGE_PS) { - GET_TABLE(pdpt)[GET_PDPT_INDEX(pool_base)] = ((uint64_t)&bootstrap_pd[0] - KERNEL_VMA) | - PAGE_PRESENT | PAGE_RW; - - bootstrap_pd[GET_PD_INDEX(pool_base)] = pool_base | PAGE_PRESENT | PAGE_RW | PAGE_PS; - } - - root_pool = (struct paging_pool_header_t*)pool_base; - hint = root_pool; - - kmemset(root_pool, 0, POOL_SIZE); - - root_pool->ac = 1; - root_pool->bitmap[0] = 0x01; - root_pool->next = early_create_pool(); + lock_init(&paging_lock); } -void paging_map(uint64_t vaddr, uint64_t paddr, uint8_t flg, enum page_size_t page_size) { - uint64_t pdpt = (uint64_t)kernel_pml4[GET_PML4_INDEX(vaddr)]; - - if ((pdpt & PAGE_PRESENT) != PAGE_PRESENT) { - pdpt = alloc_page() | PAGE_TBL_FLG; - kernel_pml4[GET_PML4_INDEX(vaddr)] = pdpt; - } - - uint64_t pd = GET_TABLE(pdpt)[GET_PDPT_INDEX(vaddr)]; - - if ((pd & PAGE_PRESENT) == PAGE_PRESENT && (pd & PAGE_PS) == PAGE_PS) { - if ((pd & TABLE_PADDR_MASK) == (paddr & PDPT_PADDR_MASK)) { - logging_log_warning("Already mapped 0x%lX -> 0x%lX", paddr, vaddr); - return; - } - - panic(PANIC_PAGING); - } - - if (page_size == PAGE_1G) { - if ((pd & PAGE_PRESENT) == PAGE_PRESENT) { - panic(PANIC_PAGING); +uint64_t paging_map(uint64_t vaddr, uint64_t paddr, uint8_t flg, enum page_size_t page_size) { + uint64_t* access; + enum page_size_t lvl = page_walk(vaddr, &access); + + if (lvl < page_size) { + logging_log_error("Cannot override page of finer granularity from 0x%lx-0x%lx (%u) to 0x%lx-0x%lx (%u)", + vaddr, *access - IDENT_BASE, (uint32_t)lvl, vaddr, paddr | flg, (uint32_t)page_size); + return *access - IDENT_BASE; + } + + if (*access & PAGE_PRESENT) { + logging_log_error("Cannot override page from 0x%lx-0x%lx (%u) to 0x%lx-0x%lx (%u)", + vaddr, *access, (uint32_t)lvl, vaddr, paddr | flg, (uint32_t)page_size); + return *access; + } + + for (; lvl > page_size; lvl--) { + *access = mm_alloc_p(0x1000) | PAGE_PRESENT | PAGE_RW; + access = (uint64_t*)paging_ident((*access & PAGE_ADDR_MASK)); + kmemset(access, 0, 0x1000); + + switch (lvl) { + case _PAGE_512G: + access = &(access)[GET_PDPT_INDEX(vaddr)]; + break; + case PAGE_1G: + access = &(access)[GET_PD_INDEX(vaddr)]; + break; + default: + access = &(access)[GET_PT_INDEX(vaddr)]; + break; } - - GET_TABLE(pdpt)[GET_PDPT_INDEX(vaddr)] = paddr | flg | PAGE_PS; - return; } - if ((pd & PAGE_PRESENT) != PAGE_PRESENT) { - pd = alloc_page() | PAGE_TBL_FLG; - GET_TABLE(pdpt)[GET_PDPT_INDEX(vaddr)] = pd; + if (page_size != PAGE_4K) { + flg |= PAGE_PS; } - uint64_t pt = GET_TABLE(pd)[GET_PD_INDEX(vaddr)]; - - if ((pt & PAGE_PRESENT) == PAGE_PRESENT && (pt & PAGE_PS) == PAGE_PS) { - if ((pt & TABLE_PADDR_MASK) == (paddr & PD_PADDR_MASK)) { - logging_log_warning("Already mapped 0x%lX -> 0x%lX", paddr, vaddr); - return; - } - - panic(PANIC_PAGING); - } + *access = paddr | flg; + return paddr; +} - if (page_size == PAGE_2M) { - if ((pt & PAGE_PRESENT) == PAGE_PRESENT) { - panic(PANIC_PAGING); - } +void paging_unmap(uint64_t vaddr, enum page_size_t page_size) { + uint64_t* access; + enum page_size_t lvl = page_walk(vaddr, &access); - GET_TABLE(pd)[GET_PD_INDEX(vaddr)] = paddr | flg | PAGE_PS; + if (lvl != page_size) { + logging_log_error("Cannot unmap page of different granularity"); return; } - if ((pt & PAGE_PRESENT) != PAGE_PRESENT) { - pt = alloc_page() | PAGE_TBL_FLG; - GET_TABLE(pd)[GET_PD_INDEX(vaddr)] = pt; - } - - const uint64_t addr = GET_TABLE(pt)[GET_PT_INDEX(vaddr)]; - - if ((addr & PAGE_PRESENT) == PAGE_PRESENT) { - if ((addr & TABLE_PADDR_MASK) == paddr) { - logging_log_warning("Already mapped 0x%lX -> 0x%lX", paddr, vaddr); - return; - } - - panic(PANIC_PAGING); - } - - GET_TABLE(pt)[GET_PT_INDEX(vaddr)] = paddr | flg; + *access = 0; } -void paging_early_map_2m(uint64_t vaddr, uint64_t paddr, uint8_t flg) { - uint64_t pdpt = (uint64_t)kernel_pml4[GET_PML4_INDEX(vaddr)]; - - if ((pdpt & PAGE_PRESENT) != PAGE_PRESENT) { - pdpt = early_alloc_page() | PAGE_TBL_FLG; - kernel_pml4[GET_PML4_INDEX(vaddr)] = pdpt; - } - - uint64_t pd = GET_TABLE(pdpt)[GET_PDPT_INDEX(vaddr)]; - - if ((pd & PAGE_PS) == PAGE_PS) { - if ((pd & TABLE_PADDR_MASK) == (paddr & PDPT_PADDR_MASK)) { - logging_log_warning("Already mapped 0x%lX -> 0x%lX", paddr, vaddr); - return; - } - - panic(PANIC_PAGING); - } - - if ((pd & PAGE_PRESENT) != PAGE_PRESENT) { - pd = early_alloc_page() | PAGE_TBL_FLG; - GET_TABLE(pdpt)[GET_PDPT_INDEX(vaddr)] = pd; - } - - const uint64_t pt = GET_TABLE(pd)[GET_PD_INDEX(vaddr)]; - - if ((pt & PAGE_PRESENT) == PAGE_PRESENT) { - if ((pt & TABLE_PADDR_MASK) == (paddr & PD_PADDR_MASK)) { - logging_log_warning("Already mapped 0x%lX -> 0x%lX", paddr, vaddr); - return; - } - - panic(PANIC_PAGING); - } - - GET_TABLE(pd)[GET_PD_INDEX(vaddr)] = paddr | flg | PAGE_PS; +uint64_t paging_ident(uint64_t paddr) { + return paddr + IDENT_BASE; } diff --git a/kernel/core/proc_data.S b/kernel/core/proc_data.S new file mode 100644 index 0000000..b29cca3 --- /dev/null +++ b/kernel/core/proc_data.S @@ -0,0 +1,43 @@ +/* proc_data.S - per procesor data routines */ +/* Copyright (C) 2025-2026 Ebrahim Aleem +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see +*/ + +.section .text + +.globl proc_data_get +proc_data_get: +movq %gs:0, %rax +ret + +.globl proc_data_set_id +proc_data_set_id: +movq proc_data_ptr, %rax +movq (%rax,%rdi,8), %rax +movl $0xC0000101, %ecx +movq %rax, %rdx +shrq $32,%rdx +wrmsr +ret + +.section .data + +.globl proc_arb_id +proc_arb_id: +.byte 0 + +.globl proc_data_ptr +proc_data_ptr: +.quad 0 diff --git a/kernel/include/core/alloc.h b/kernel/include/core/alloc.h index 87a50ff..676f811 100644 --- a/kernel/include/core/alloc.h +++ b/kernel/include/core/alloc.h @@ -18,12 +18,44 @@ #ifndef KERNEL_CORE_ALLOC_H #define KERNEL_CORE_ALLOC_H +#include #include +enum slab_t { + SLAB_1, + SLAB_2, + SLAB_4, + SLAB_8, + SLAB_16, + SLAB_32, + SLAB_64, + SLAB_128, + SLAB_256, + SLAB_512, + SLAB_1K, + SLAB_2K, + SLAB_4K, + SLAB_8K, + SLAB_16K, + SLAB_32K, + SLAB_64K, + SLAB_128K, + SLAB_256K, + SLAB_512K, + SLAB_1M, + SLAB_2M, + SLAB_MAX +}; + +struct proc_alloc_caches_t { + uint8_t lock; + struct cache_node_t* avl; + struct cache_node_t* slabs[SLAB_MAX]; +}; + extern void alloc_init(void); extern void* kmalloc(size_t size); - -extern void* early_kmalloc(size_t size); +extern void kfree(void* ptr); #endif /* KERNEL_CORE_ALLOC_H */ diff --git a/kernel/include/core/lock.h b/kernel/include/core/lock.h index a43d8cc..0ba48ef 100644 --- a/kernel/include/core/lock.h +++ b/kernel/include/core/lock.h @@ -20,6 +20,7 @@ #ifndef KERNEL_INCLUDE_CORE_LOCK_H #define KERNEL_INCLUDE_CORE_LOCK_H +extern void lock_init(uint8_t* lock); extern void lock_acquire(uint8_t* lock); extern void lock_release(uint8_t* lock); diff --git a/kernel/include/core/mm.h b/kernel/include/core/mm.h index bd50c8b..16ed60a 100644 --- a/kernel/include/core/mm.h +++ b/kernel/include/core/mm.h @@ -21,43 +21,38 @@ #include #include -enum mm_order_t { - MM_ORDER_4K, - MM_ORDER_8K, - MM_ORDER_16K, - MM_ORDER_32K, - MM_ORDER_64K, - MM_ORDER_128K, - MM_ORDER_256K, - MM_ORDER_512K, - MM_ORDER_1M, - MM_ORDER_2M, - MM_MAX_ORDER -}; - -struct page_frame_t { - uint8_t flg; -} __attribute__((packed)); +#define CANON_HIGH 0xFFFF800000000000 +#define VIRTUAL_LIMIT IDENT_BASE +#define SIZE_2M 0x200000 -struct mm_free_buddy_t { +struct mem_segment_t { uint64_t base; - struct mm_free_buddy_t* next; + size_t size; + enum { + MEM_AVL, + MEM_CLM, + MEM_PRS + } type; }; -struct mm_order_entry_t { - struct mm_free_buddy_t* free; - uint8_t* bitmap; -}; +extern void mm_init( + void (*first_segment)(uint64_t* handle), + void (*next_segment)(uint64_t* handle, struct mem_segment_t* seg)); +extern uint64_t mm_early_alloc_2m(void); -extern uint64_t mm_alloc(enum mm_order_t order); +uint64_t mm_alloc_p(size_t size); +uint64_t mm_alloc_v(size_t size); -extern uint64_t mm_alloc_pv(size_t size); +uint64_t mm_alloc_p2m(void); +uint64_t mm_alloc_v2m(void); -extern void mm_init_dv(void); +void mm_free_p(uint64_t addr, size_t size); +void mm_free_v(uint64_t addr, size_t size); -extern uint64_t mm_alloc_dv(enum mm_order_t order); +void mm_free_p2m(uint64_t addr); +void mm_free_v2m(uint64_t addr); -extern uint64_t mm_lowest_order(size_t size); +void mm_defrag(void); #endif /* KERNEL_CORE_MM_H */ diff --git a/kernel/include/core/paging.h b/kernel/include/core/paging.h index cbada83..12c2d4c 100644 --- a/kernel/include/core/paging.h +++ b/kernel/include/core/paging.h @@ -28,18 +28,23 @@ #define PAGE_BASE_MASK 0xFFFFFFFFFFFFF000 #define PAGE_SIZE_4K 0x1000 -#define KERNEL_VMA 0xFFFFFFFF80000000 +#define PAGE_SIZE_2M 0x200000 +#define PAGE_SIZE_1G 0x40000000 +#define KERNEL_VMA 0xFFFFFFFF80000000 +#define IDENT_BASE 0xFFFFFF0000000000 enum page_size_t { PAGE_4K, PAGE_2M, - PAGE_1G + PAGE_1G, + _PAGE_512G, // reserved for internal }; extern void paging_init(void); -extern void paging_map(uint64_t vaddr, uint64_t paddr, uint8_t flg, enum page_size_t page_size); +extern uint64_t paging_map(uint64_t vaddr, uint64_t paddr, uint8_t flg, enum page_size_t page_size); +extern void paging_unmap(uint64_t vaddr, enum page_size_t page_size); +extern uint64_t paging_ident(uint64_t paddr); -extern void paging_early_map_2m(uint64_t vaddr, uint64_t paddr, uint8_t flg); #endif /* KERNEL_CORE_PAGING_H */ diff --git a/kernel/include/core/mm_init.h b/kernel/include/core/proc_data.h similarity index 61% rename from kernel/include/core/mm_init.h rename to kernel/include/core/proc_data.h index b5f9e23..11d99db 100644 --- a/kernel/include/core/mm_init.h +++ b/kernel/include/core/proc_data.h @@ -1,4 +1,4 @@ -/* mm_init.h - memory manager initialization interface */ +/* proc_data.h - per procesor data interface */ /* Copyright (C) 2025-2026 Ebrahim Aleem * * This program is free software: you can redistribute it and/or modify @@ -15,26 +15,23 @@ * along with this program. If not, see */ -#ifndef KERNEL_CORE_MM_INIT_H -#define KERNEL_CORE_MM_INIT_H +#ifndef KERNEL_CORE_PROC_DATA_H +#define KERNEL_CORE_PROC_DATA_H #include -#include - -struct mem_segment_t { - uint64_t base; - size_t size; - enum { - MEM_AVL, - MEM_CLM, - MEM_PRS - } type; + +#include + +struct proc_data_t { + uint8_t arb_id; + struct proc_alloc_caches_t alloc_caches; }; -extern void mm_init( - void (*first_segment)(uint64_t* handle), - void (*next_segment)(uint64_t* handle, struct mem_segment_t* seg)); +extern uint8_t proc_arb_id; + +extern struct proc_data_t** proc_data_ptr; -extern uint64_t mm_early_alloc_2m(void); +extern void proc_data_set_id(uint8_t id); +extern struct proc_data_t* proc_data_get(void); -#endif /* KERNEL_CORE_MM_INIT_H */ +#endif /* KERNEL_CORE_PROC_DATA_H */ diff --git a/kernel/include/lib/mergesort.h b/kernel/include/lib/mergesort.h new file mode 100644 index 0000000..0877ffd --- /dev/null +++ b/kernel/include/lib/mergesort.h @@ -0,0 +1,23 @@ +/* mergesort.h - mergesort interface */ +/* Copyright (C) 2025-2026 Ebrahim Aleem +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see +*/ + +#ifndef KERNEL_LIB_MERGESORT_H +#define KERNEL_LIB_MERGESORT_H + +extern void* mergesort_ll_inplace_ul(void* ll, void** (*next)(void*), unsigned long (*value)(void*)); + +#endif /* KERNEL_LIB_MERGESORT_H */ diff --git a/kernel/lib/mergesort.c b/kernel/lib/mergesort.c new file mode 100644 index 0000000..b46aaf9 --- /dev/null +++ b/kernel/lib/mergesort.c @@ -0,0 +1,72 @@ +/* mergesort.h - mergesort implementation */ +/* Copyright (C) 2025-2026 Ebrahim Aleem +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see +*/ + +#include + +void* mergesort_ll_inplace_ul(void* ll, void** (*next)(void*), unsigned long (*value)(void*)) { + if (!ll || !*next(ll)) { + return ll; + } + + void *mid, *right, *left; + void *fast; + void *head, *tail; + + // find mid + mid = ll; + fast = *next(ll); + + while (fast && *next(fast)) { + mid = *next(mid); + fast = *next(*next(fast)); + } + + right = *next(mid); + *next(mid) = 0; + + left = mergesort_ll_inplace_ul(ll, next, value); + right = mergesort_ll_inplace_ul(right, next, value); + + // merge + if (!left) return right; + if (!right) return left; + + if (value(left) <= value(right)) { + head = left; + left = *next(left); + } else { + head = right; + right = *next(right); + } + + tail = head; + + while (left && right) { + if (value(left) <= value(right)) { + *next(tail) = left; + left = *next(left); + } else { + *next(tail) = right; + right = *next(right); + } + tail = *next(tail); + } + + *next(tail) = left ? left : right; + + return head; +} From 9fe73dad746a87f34423449a4183dd57a99dd9ae Mon Sep 17 00:00:00 2001 From: Ebrahim Aleem Date: Tue, 10 Feb 2026 23:09:28 -0800 Subject: [PATCH 2/4] ap bootstrap --- boot/multiboot2/boot.S | 2 + boot/multiboot2/init.c | 2 +- drivers/apic/apic_init.c | 57 ++++++++ drivers/apic/ipi.c | 50 +++++++ drivers/include/apic/apic_init.h | 2 + drivers/include/apic/apic_regs.h | 4 +- .../05_mp.ld => drivers/include/apic/ipi.h | 25 ++-- drivers/serial/serial.S | 3 +- drivers/serial/serial_print.c | 2 + kernel/core/ap.S | 123 ++++++++++++++++++ kernel/core/kentry.c | 22 ++++ kernel/include/core/kentry.h | 2 + kernel/include/core/proc_data.h | 1 + 13 files changed, 277 insertions(+), 18 deletions(-) create mode 100644 drivers/apic/ipi.c rename kernel/core/05_mp.ld => drivers/include/apic/ipi.h (64%) create mode 100644 kernel/core/ap.S diff --git a/boot/multiboot2/boot.S b/boot/multiboot2/boot.S index c5be9c5..8748ea4 100644 --- a/boot/multiboot2/boot.S +++ b/boot/multiboot2/boot.S @@ -268,6 +268,8 @@ gdt: .quad 0 .quad 0 gdt.end: +.globl gdt_end +gdt_end: .equ CODE_SEG, gdt.codeseg - gdt .equ DATA_SEG, gdt.dataseg - gdt diff --git a/boot/multiboot2/init.c b/boot/multiboot2/init.c index 47131c8..1a36244 100644 --- a/boot/multiboot2/init.c +++ b/boot/multiboot2/init.c @@ -47,7 +47,7 @@ #define VIDEO_XRGB8888_BLUEMASK 0x08 #define VIDEO_XRGB8888_BPP 32 -static struct proc_data_t bsp_proc_data; +struct proc_data_t bsp_proc_data; static struct proc_data_t* bsp_proc_data_ptr; struct mb2_tag_memmap_entry_t { diff --git a/drivers/apic/apic_init.c b/drivers/apic/apic_init.c index 2b1570f..53e4d49 100644 --- a/drivers/apic/apic_init.c +++ b/drivers/apic/apic_init.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -32,6 +33,7 @@ #include #include #include +#include #define APIC_BASE_MASK 0xFFFFFFFFFF000 @@ -75,11 +77,21 @@ #define LINT0_SET 0x1 #define LINT1_SET 0x2 +#define AP_ARB_BASE 0x9000 + +#define INIT_STACK_SIZE 0x4000 + uint64_t apic_base; uint8_t bsp_apic_id; static uint8_t timer_vector; +extern uint8_t gdt; +extern uint8_t gdt_end; +extern uint8_t kernel_pml4; + +static uint64_t* init_stacks; + void apic_init(void) { apic_base = msr_read(MSR_APIC_BASE) & APIC_BASE_MASK; paging_map(apic_base, apic_base, @@ -90,6 +102,7 @@ void apic_init(void) { logging_log_info("Initializing Local APIC 0x%lX", (uint64_t)apic_id); // get ACPI uid + uint8_t num_apic = 0; uint64_t handle; uint8_t acpi_uid = ACPI_UID_ALL_PROC; struct acpi_madt_ics_local_apic_t* local_apic; @@ -102,8 +115,19 @@ void apic_init(void) { logging_log_debug("ACPI UID 0x%lX -> APIC ID 0x%lX", (uint64_t)acpi_uid, (uint64_t)apic_id); break; } + num_apic++; } + init_stacks = kmalloc(sizeof(uint64_t*) * num_apic); + proc_data_ptr = kmalloc(sizeof(struct proc_data_t*) * num_apic); + proc_data_ptr[0] = &bsp_proc_data; + + for (--num_apic; num_apic; num_apic--) { + proc_data_ptr[num_apic] = kmalloc(sizeof(struct proc_data_t)); + init_stacks[num_apic] = (uint64_t)kmalloc(INIT_STACK_SIZE) + INIT_STACK_SIZE - 16; + } + + if (acpi_uid == ACPI_UID_ALL_PROC) { logging_log_error("No ACPI ID for APIC 0x%lX", (uint64_t)apic_id); panic(PANIC_APIC); @@ -246,3 +270,36 @@ void apic_timer_calib(uint8_t id) { uint8_t apic_get_bsp_id(void) { return bsp_apic_id; } + +void apic_init_ap(void) { + *(volatile uint8_t*)paging_ident(AP_ARB_BASE + 0x0) = 0; // arb lock + *(volatile uint8_t*)paging_ident(AP_ARB_BASE + 0x1) = 0; // arb id + *(volatile uint16_t*)paging_ident(AP_ARB_BASE + 0x2) = + (uint16_t)((uint64_t)&gdt_end - (uint64_t)&gdt - 1); // gdt32 ptr + *(volatile uint32_t*)paging_ident(AP_ARB_BASE + 0x4) = (uint32_t)(uint64_t)&gdt; + *(volatile uint16_t*)paging_ident(AP_ARB_BASE + 0x8) = + (uint16_t)((uint64_t)&gdt_end - (uint64_t)&gdt - 1); // gdt64 ptr + *(volatile uint64_t*)paging_ident(AP_ARB_BASE + 0xa) = (uint64_t)&gdt; + *(volatile uint32_t*)paging_ident(AP_ARB_BASE + 0x12) = (uint32_t)(uint64_t)&kernel_pml4; + *(volatile uint64_t*)paging_ident(AP_ARB_BASE + 0x16) = (uint64_t)init_stacks; + + uint64_t handle; + struct acpi_madt_ics_local_apic_t* local_apic; + acpi_parse_madt_ics_start(&handle); + for (acpi_parse_madt_ics((void*)&local_apic, &handle, MADT_ICS_PROCESSOR_LOCAL_APIC); + handle != 0; + acpi_parse_madt_ics((void*)&local_apic, &handle, MADT_ICS_PROCESSOR_LOCAL_APIC)) { + if (local_apic->APICID == apic_get_bsp_id()) { + continue; + } + apic_send_ipi_init_set(local_apic->APICID); + apic_wait_for_ipi(); + apic_send_ipi_init_clear(local_apic->APICID); + + time_busy_wait(10 * TIME_CONV_MS_TO_NS); + apic_send_ipi_sipi(local_apic->APICID); + + time_busy_wait(2000); + apic_send_ipi_sipi(local_apic->APICID); + } +} diff --git a/drivers/apic/ipi.c b/drivers/apic/ipi.c new file mode 100644 index 0000000..cb7ee82 --- /dev/null +++ b/drivers/apic/ipi.c @@ -0,0 +1,50 @@ +/* ipi.c - Inter Processor Interrupt functions */ +/* Copyright (C) 2025-2026 Ebrahim Aleem +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see +*/ + +#include +#include + +#include + +#define ICR_LEVEL 0x8000 +#define ICR_ASSERT 0x4000 +#define ICR_DS 0x1000 +#define ICR_LO_INIT 0x0500 +#define ICR_LO_SIPI (0x0600 | AP_ENTRY_PAGE) + +#define ICR_PID_SHFT 24 + +void apic_wait_for_ipi(void) { + while (apic_read_reg(APIC_REG_ICL) & ICR_DS) { + cpu_pause(); + } +} + +void apic_send_ipi_init_set(uint8_t apic_id) { + apic_write_reg(APIC_REG_ICH, (uint32_t)apic_id << ICR_PID_SHFT); + apic_write_reg(APIC_REG_ICL, ICR_LO_INIT | ICR_LEVEL | ICR_ASSERT ); +} + +void apic_send_ipi_init_clear(uint8_t apic_id) { + apic_write_reg(APIC_REG_ICH, (uint32_t)apic_id << ICR_PID_SHFT); + apic_write_reg(APIC_REG_ICL, ICR_LO_INIT | ICR_LEVEL); +} + +void apic_send_ipi_sipi(uint8_t apic_id) { + apic_write_reg(APIC_REG_ICH, (uint32_t)apic_id << ICR_PID_SHFT); + apic_write_reg(APIC_REG_ICL, ICR_LO_SIPI); +} diff --git a/drivers/include/apic/apic_init.h b/drivers/include/apic/apic_init.h index e3c238c..9fcee59 100644 --- a/drivers/include/apic/apic_init.h +++ b/drivers/include/apic/apic_init.h @@ -30,4 +30,6 @@ extern void apic_timer_calib(uint8_t id); extern uint8_t apic_get_bsp_id(void); +extern void apic_init_ap(void); + #endif /* DRIVERS_APIC_APIC_INIT_H */ diff --git a/drivers/include/apic/apic_regs.h b/drivers/include/apic/apic_regs.h index 6c54eb9..e5524b0 100644 --- a/drivers/include/apic/apic_regs.h +++ b/drivers/include/apic/apic_regs.h @@ -28,13 +28,15 @@ enum apic_reg_t { APIC_REG_EOI = 0x00B0, // end of interrupt register APIC_REG_SPR = 0x00F0, // spurious interrupt register APIC_REG_ESR = 0x0270, // error status register + APIC_REG_ICL = 0x0300, // interrupt control register lo + APIC_REG_ICH = 0x0310, // interrupt control register hi APIC_REG_TME = 0x0320, // timer LVT entry register APIC_REG_THE = 0x0330, // thermal LVT entry register APIC_REG_PRE = 0x0340, // performance LVT entry register APIC_REG_L0E = 0x0350, // LINT0 LVT entry register APIC_REG_L1E = 0x0360, // LINT1 LVT entry register APIC_REG_ERE = 0x0370, // error LVT entry register - APIC_REG_ICR = 0x0380, // timer initial count register + APIC_REG_TCR = 0x0380, // timer initial count register APIC_REG_DCR = 0x03E0, // divide configuration register APIC_REG_EFR = 0x0400, // extended feature register APIC_REG_EE0 = 0x0500, // extened lVT entry register 0 diff --git a/kernel/core/05_mp.ld b/drivers/include/apic/ipi.h similarity index 64% rename from kernel/core/05_mp.ld rename to drivers/include/apic/ipi.h index 14d8508..a854b10 100644 --- a/kernel/core/05_mp.ld +++ b/drivers/include/apic/ipi.h @@ -1,4 +1,4 @@ -/* 05_mp.ld - MP boostrap linker script */ +/* ipi.c - Inter Processor Interrupt interface */ /* Copyright (C) 2025-2026 Ebrahim Aleem * * This program is free software: you can redistribute it and/or modify @@ -15,21 +15,16 @@ * along with this program. If not, see */ -PHDRS { - mptext PT_LOAD FLAGS(1 | 4); /* XR */ - mpdata PT_LOAD FLAGS(2 | 4); /* WR */ -} +#ifndef DRIVERS_APIC_IPI_H +#define DRIVERS_APIC_IPI_H -SECTIONS { - . = 0x1000; +#include - .text.mpboot ALIGN(4K) : { - *(.text.mpboot) - } :mptext +#define AP_ENTRY_PAGE 8 - .data.mpboot ALIGN(4K) : { - *(.data.mpboot) - } :mpdata +extern void apic_wait_for_ipi(void); +extern void apic_send_ipi_init_set(uint8_t apic_id); +extern void apic_send_ipi_init_clear(uint8_t apic_id); +extern void apic_send_ipi_sipi(uint8_t apic_id); - ASSERT(. <= 0x9fc00, "Kernel has overlap with EBDA") -} +#endif /* DRIVERS_APIC_IPI_H */ diff --git a/drivers/serial/serial.S b/drivers/serial/serial.S index 7f22f96..cc8b1e2 100644 --- a/drivers/serial/serial.S +++ b/drivers/serial/serial.S @@ -45,6 +45,8 @@ //TODO: test ports via loopback +.section .text + .globl serial_init_com1 serial_init_com1: mov $COM1, %rdi @@ -103,4 +105,3 @@ mov $(COM2 + TRSM), %dx movb %dil, %al out %al, %dx ret - diff --git a/drivers/serial/serial_print.c b/drivers/serial/serial_print.c index d6a7979..7542256 100644 --- a/drivers/serial/serial_print.c +++ b/drivers/serial/serial_print.c @@ -21,6 +21,8 @@ #include #include +#include + #define FLG_LL 0x1 #define FLG_PD 0x2 #define FLG_LM 0x4 diff --git a/kernel/core/ap.S b/kernel/core/ap.S new file mode 100644 index 0000000..94ccdfb --- /dev/null +++ b/kernel/core/ap.S @@ -0,0 +1,123 @@ +/* ap.S - AP bootstrap routine */ +/* Copyright (C) 2025-2026 Ebrahim Aleem +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see +*/ + +#define AP_LOCK 0x9000 +#define AP_ID 0x9001 +#define AP_STACK 0x9100 +#define AP_GDT32 0x9002 +#define AP_GDT64 0x9008 +#define AP_PML4 0x9012 +#define AP_STACKS 0x9016 + +#define CMOS_RAM_INDEX 0x70 +#define CMOS_RAM_DATA 0x71 +#define NMI_DISABLE 0x80 + +.section .data + +.globl ap_bootstrap_start +ap_bootstrap_start: + + +.code16 + +// set known state +ljmp $0, $0x8005 +xorw %ax, %ax +movw %ax, %ds +movw %ax, %ss +movw %ax, %es +movw %ax, %fs +movw %ax, %gs +movl $AP_STACK, %esp // identical stacks so concurrency is not a problem +pushw %ax +popf + +// mask nmis +inb $CMOS_RAM_INDEX, %al +or $NMI_DISABLE, %al +outb %al, $CMOS_RAM_INDEX +out %al, $0x80 +inb $CMOS_RAM_DATA, %al + +// assign arbitration id +movb $1, %al +.wait_lock: +xchgb %al, AP_LOCK +test %al, %al +jnz .wait_lock +movb AP_ID, %bl +addb $1, %bl +movb %bl, AP_ID +xorb %al, %al +xchgb %al, AP_LOCK + +// enable paging +movl %cr4, %eax +orl $0x30, %eax +movl %eax, %cr4 + +movl AP_PML4, %edi +movl %edi, %cr3 + +// enter compat +movl $0xC0000080, %ecx +rdmsr +orl $0x100, %eax +wrmsr + +movl %cr0, %ecx +orl $0x80000001, %ecx +movl %ecx, %cr0 + +lgdt AP_GDT32 + +pushw $0 +pushw $8 +pushw $0 +pushw $0x807d +lretl + +// 0x2083 + +.code64 + +movw $0x10, %ax +movw %ax, %ds +movw %ax, %es +movw %ax, %fs +movw %ax, %gs +movw %ax, %ss + +lgdt AP_GDT64 + +xorq %rdi, %rdi +movb %bl, %dil + +movq AP_STACKS, %rax +movq (%rax,%rdi,8), %rsp + +pushq $0 +popfq + +pushq $0x8 +pushq $kapentry +lretq + + +.globl ap_bootstrap_end +ap_bootstrap_end: diff --git a/kernel/core/kentry.c b/kernel/core/kentry.c index b7087dc..5a06db2 100644 --- a/kernel/core/kentry.c +++ b/kernel/core/kentry.c @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -26,14 +27,20 @@ #include #include +#include + #include #include #include #include +#include #include struct boot_context_t boot_context; +extern uint8_t ap_bootstrap_start; +extern uint8_t ap_bootstrap_end; + void kentry(void) { logging_log_debug("Kernel Entry"); logging_log_debug("TSS and IDT init"); @@ -64,5 +71,20 @@ void kentry(void) { logging_log_info("Boot Complete ModulOS"); + logging_log_info("Begining AP bootstrap sequence"); + kmemcpy( + (void*)paging_ident(AP_ENTRY_PAGE * PAGE_SIZE_4K), + &ap_bootstrap_start, + (uint64_t)&ap_bootstrap_end - (uint64_t)&ap_bootstrap_start); + + apic_init_ap(); + + logging_log_info("AP bootstrap sequence done"); + + cpu_halt_loop(); +} + +void kapentry(uint64_t arb_id) { + logging_log_debug("AP %lx bootstrap complete", arb_id); cpu_halt_loop(); } diff --git a/kernel/include/core/kentry.h b/kernel/include/core/kentry.h index 0d513e8..0f6f1c1 100644 --- a/kernel/include/core/kentry.h +++ b/kernel/include/core/kentry.h @@ -41,4 +41,6 @@ extern struct boot_context_t boot_context; extern void kentry(void) __attribute__((noreturn)); +extern void kapentry(uint64_t arb_id) __attribute__((noreturn)); + #endif /* KERNEL_CORE_KENTRY_H */ diff --git a/kernel/include/core/proc_data.h b/kernel/include/core/proc_data.h index 11d99db..d534b1f 100644 --- a/kernel/include/core/proc_data.h +++ b/kernel/include/core/proc_data.h @@ -29,6 +29,7 @@ struct proc_data_t { extern uint8_t proc_arb_id; +extern struct proc_data_t bsp_proc_data; extern struct proc_data_t** proc_data_ptr; extern void proc_data_set_id(uint8_t id); From 5b9a2386b5490ff6d812fdee3dbe85787b779cea Mon Sep 17 00:00:00 2001 From: Ebrahim Aleem Date: Wed, 11 Feb 2026 10:28:31 -0800 Subject: [PATCH 3/4] ap init --- boot/multiboot2/init.c | 3 +- drivers/acpica_osl/osl.c | 2 +- drivers/apic/apic_init.c | 45 +++++++++++++++------------ drivers/hpet/hpet_init.c | 12 ++++++- drivers/include/apic/apic_init.h | 4 ++- drivers/include/serial/serial_print.h | 2 ++ drivers/serial/serial_print.c | 8 +++++ kernel/core/clock_src.c | 7 +++++ kernel/core/idt.c | 4 +++ kernel/core/kentry.c | 24 +++++++++++++- kernel/core/proc_data.S | 4 --- kernel/include/core/idt.h | 1 + kernel/include/core/proc_data.h | 2 -- 13 files changed, 87 insertions(+), 31 deletions(-) diff --git a/boot/multiboot2/init.c b/boot/multiboot2/init.c index 1a36244..b9fae84 100644 --- a/boot/multiboot2/init.c +++ b/boot/multiboot2/init.c @@ -141,12 +141,13 @@ void multiboot2_init(struct mb2_info_t* info) { bsp_proc_data_ptr = &bsp_proc_data; proc_data_ptr = &bsp_proc_data_ptr; proc_data_set_id(0); - proc_data_get()->arb_id = ++proc_arb_id; + proc_data_get()->arb_id = 0; logging_init(); #ifdef SERIAL serial_init_com1(); serial_init_com2(); + serial_print_init(); serial_print_com1("COM1\r\n"); serial_print_com2("COM2\r\n"); diff --git a/drivers/acpica_osl/osl.c b/drivers/acpica_osl/osl.c index 3c35290..dfae399 100644 --- a/drivers/acpica_osl/osl.c +++ b/drivers/acpica_osl/osl.c @@ -43,7 +43,7 @@ ACPI_STATUS AcpiOsInitialize(void) { } ACPI_THREAD_ID AcpiOsGetThreadId(void) { - return proc_data_get()->arb_id; + return proc_data_get()->arb_id + 1; //bsp is zero, acpica wants non zero } void ACPI_INTERNAL_XFACE AcpiOsPrintf(const char* fmt, ...) { diff --git a/drivers/apic/apic_init.c b/drivers/apic/apic_init.c index 53e4d49..ea00fc1 100644 --- a/drivers/apic/apic_init.c +++ b/drivers/apic/apic_init.c @@ -83,8 +83,10 @@ uint64_t apic_base; uint8_t bsp_apic_id; +static uint8_t num_apic; static uint8_t timer_vector; +static uint8_t error_vector; extern uint8_t gdt; extern uint8_t gdt_end; @@ -97,12 +99,33 @@ void apic_init(void) { paging_map(apic_base, apic_base, PAGE_PRESENT | PAGE_RW | PAT_MMIO_4K, PAGE_4K); + timer_vector = idt_get_vector(); + error_vector = idt_get_vector(); + + // install idts + idt_install(timer_vector, (uint64_t)apic_isr_timer, GDT_CODE_SEL, 0, IDT_GATE_INT, 0); + idt_install(error_vector, (uint64_t)apic_isr_error, GDT_CODE_SEL, 0, IDT_GATE_INT, 0); + + apic_init_ap(); + + // init stacks + init_stacks = kmalloc(sizeof(uint64_t*) * num_apic); + proc_data_ptr = kmalloc(sizeof(struct proc_data_t*) * num_apic); + proc_data_ptr[0] = &bsp_proc_data; + + for (--num_apic; num_apic; num_apic--) { + proc_data_ptr[num_apic] = kmalloc(sizeof(struct proc_data_t)); + init_stacks[num_apic] = (uint64_t)kmalloc(INIT_STACK_SIZE) + INIT_STACK_SIZE - 16; + } +} + +void apic_init_ap(void) { const uint8_t apic_id = (uint8_t)(apic_read_reg(APIC_REG_IDR) >> APIC_ID_SHFT); bsp_apic_id = apic_id; logging_log_info("Initializing Local APIC 0x%lX", (uint64_t)apic_id); // get ACPI uid - uint8_t num_apic = 0; + num_apic = 0; uint64_t handle; uint8_t acpi_uid = ACPI_UID_ALL_PROC; struct acpi_madt_ics_local_apic_t* local_apic; @@ -113,21 +136,10 @@ void apic_init(void) { if (local_apic->APICID == apic_id) { acpi_uid = local_apic->ACPIProcessorUID; logging_log_debug("ACPI UID 0x%lX -> APIC ID 0x%lX", (uint64_t)acpi_uid, (uint64_t)apic_id); - break; } num_apic++; } - init_stacks = kmalloc(sizeof(uint64_t*) * num_apic); - proc_data_ptr = kmalloc(sizeof(struct proc_data_t*) * num_apic); - proc_data_ptr[0] = &bsp_proc_data; - - for (--num_apic; num_apic; num_apic--) { - proc_data_ptr[num_apic] = kmalloc(sizeof(struct proc_data_t)); - init_stacks[num_apic] = (uint64_t)kmalloc(INIT_STACK_SIZE) + INIT_STACK_SIZE - 16; - } - - if (acpi_uid == ACPI_UID_ALL_PROC) { logging_log_error("No ACPI ID for APIC 0x%lX", (uint64_t)apic_id); panic(PANIC_APIC); @@ -153,9 +165,6 @@ void apic_init(void) { } } - timer_vector = idt_get_vector(); - const uint8_t error_vector = idt_get_vector(); - // set NMI uint8_t lint_state = 0; struct acpi_madt_ics_local_apic_nmi_t* local_nmi; @@ -208,10 +217,6 @@ void apic_init(void) { apic_write_lve(APIC_REG_ERE, error_vector, APIC_LVT_MT_FIXED | APIC_LVT_TRG_EDGE, 0); - // install idts, init timer first - idt_install(timer_vector, (uint64_t)apic_isr_timer, GDT_CODE_SEL, 0, IDT_GATE_INT, 0); - idt_install(error_vector, (uint64_t)apic_isr_error, GDT_CODE_SEL, 0, IDT_GATE_INT, 0); - // enable apic apic_write_reg(APIC_REG_ESR, 0); apic_write_reg(APIC_REG_SPR, PIC_SPURIOUS_VEC | APIC_ASE); // pic and apic spurious both only iret, so reuse @@ -271,7 +276,7 @@ uint8_t apic_get_bsp_id(void) { return bsp_apic_id; } -void apic_init_ap(void) { +void apic_start_ap(void) { *(volatile uint8_t*)paging_ident(AP_ARB_BASE + 0x0) = 0; // arb lock *(volatile uint8_t*)paging_ident(AP_ARB_BASE + 0x1) = 0; // arb id *(volatile uint16_t*)paging_ident(AP_ARB_BASE + 0x2) = diff --git a/drivers/hpet/hpet_init.c b/drivers/hpet/hpet_init.c index 71425f1..8e435da 100644 --- a/drivers/hpet/hpet_init.c +++ b/drivers/hpet/hpet_init.c @@ -24,6 +24,7 @@ #include #include #include +#include #define HPET_GROUP_ENA 0x1 #define HPET_INT_ENA 0x4 @@ -53,21 +54,30 @@ static volatile struct hpet_group_t { struct hpet_timer_t timers[32]; } __attribute__((packed))* hpet_reg_bases[8]; +static uint8_t hpet_lock; + static uint64_t hpet_get_counter(void* meta) { volatile struct hpet_group_t* hpet = meta; - return hpet->counter; + lock_acquire(&hpet_lock); + const uint64_t ret = hpet->counter; + lock_release(&hpet_lock); + return ret; } static void hpet_set_counter(void* meta, uint64_t counter) { volatile struct hpet_group_t* hpet = meta; + lock_acquire(&hpet_lock); hpet->gen_conf &= ~(uint64_t)HPET_GROUP_ENA; hpet->counter = counter; hpet->gen_conf |= HPET_GROUP_ENA; + lock_release(&hpet_lock); } void hpet_init(void) { acpi_get_hpet_bases((uint64_t*)&hpet_reg_bases[0]); + lock_init(&hpet_lock); + struct clock_src_t* clock; int8_t num_tim; diff --git a/drivers/include/apic/apic_init.h b/drivers/include/apic/apic_init.h index 9fcee59..ad77bdd 100644 --- a/drivers/include/apic/apic_init.h +++ b/drivers/include/apic/apic_init.h @@ -22,6 +22,8 @@ extern void apic_init(void); +extern void apic_init_ap(void); + extern void apic_nmi_enab(void); extern void apic_disab(void); @@ -30,6 +32,6 @@ extern void apic_timer_calib(uint8_t id); extern uint8_t apic_get_bsp_id(void); -extern void apic_init_ap(void); +extern void apic_start_ap(void); #endif /* DRIVERS_APIC_APIC_INIT_H */ diff --git a/drivers/include/serial/serial_print.h b/drivers/include/serial/serial_print.h index 1ff3dbf..387f9d9 100644 --- a/drivers/include/serial/serial_print.h +++ b/drivers/include/serial/serial_print.h @@ -30,4 +30,6 @@ extern void serial_printf_com2(const char* s, ...); extern void serial_log(enum log_severity_t severity, const char* s, va_list args); +extern void serial_print_init(void); + #endif /* DRIVERS_SERIAL_SERIAL_PRINT */ diff --git a/drivers/serial/serial_print.c b/drivers/serial/serial_print.c index 7542256..446e677 100644 --- a/drivers/serial/serial_print.c +++ b/drivers/serial/serial_print.c @@ -38,6 +38,8 @@ struct num_printer_t { void (*printer)(uint8_t); }; +static uint8_t serial_lock; + static void serial_print(const char* s, void (*printer)(uint8_t)); #define _PRINT_UINT(s) \ @@ -211,6 +213,7 @@ void serial_printf_com2(const char* s, ...) { } void serial_log(enum log_severity_t severity, const char* s, va_list args) { + lock_acquire(&serial_lock); switch (severity) { case SEVERITY_DBG: serial_print("DEBUG: ", serial_com12); @@ -236,4 +239,9 @@ void serial_log(enum log_severity_t severity, const char* s, va_list args) { serial_printf(s, serial_com12, args); break; } + lock_release(&serial_lock); +} + +void serial_print_init(void) { + lock_init(&serial_lock); } diff --git a/kernel/core/clock_src.c b/kernel/core/clock_src.c index 7d72d69..34c4a54 100644 --- a/kernel/core/clock_src.c +++ b/kernel/core/clock_src.c @@ -17,6 +17,7 @@ #include #include +#include #include @@ -26,10 +27,12 @@ struct clock_src_list_t { }; static struct clock_src_list_t* clocks; +static uint8_t clock_lock; void clock_src_init(void) { clocks = 0; + lock_init(&clock_lock); #ifdef HPET hpet_init(); #endif /* HPET */ @@ -37,19 +40,23 @@ void clock_src_init(void) { void clock_src_register(struct clock_src_t* clock) { struct clock_src_list_t* node = kmalloc(sizeof(struct clock_src_list_t)); + lock_acquire(&clock_lock); node->clock = clock; node->next = clocks; clocks = node; + lock_release(&clock_lock); } struct clock_src_t* clock_src_alloc(void) { struct clock_src_t* source = 0; + lock_acquire(&clock_lock); if (clocks) { source = clocks->clock; clocks = clocks->next; kfree(clocks); } + lock_release(&clock_lock); return source; } diff --git a/kernel/core/idt.c b/kernel/core/idt.c index be6b002..92ef1fe 100644 --- a/kernel/core/idt.c +++ b/kernel/core/idt.c @@ -77,6 +77,10 @@ void idt_init(void) { next_vector = 0x30; // leave 0x20 and 0x28 for legacy PIC IRQs + idt_init_ap(); +} + +void idt_init_ap(void) { cpu_lidt((uint64_t)&idt_ptr); } diff --git a/kernel/core/kentry.c b/kernel/core/kentry.c index 5a06db2..55931c9 100644 --- a/kernel/core/kentry.c +++ b/kernel/core/kentry.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -77,7 +78,7 @@ void kentry(void) { &ap_bootstrap_start, (uint64_t)&ap_bootstrap_end - (uint64_t)&ap_bootstrap_start); - apic_init_ap(); + apic_start_ap(); logging_log_info("AP bootstrap sequence done"); @@ -86,5 +87,26 @@ void kentry(void) { void kapentry(uint64_t arb_id) { logging_log_debug("AP %lx bootstrap complete", arb_id); + + proc_data_set_id((uint8_t)arb_id); + proc_data_get()->arb_id = (uint8_t)arb_id; + + alloc_init(); + + logging_log_debug("AP TSS and IDT init"); + tss_init(); + idt_init_ap(); + logging_log_debug("AP TSS and IDT init done"); + + logging_log_debug("AP APIC init"); + pic_disab(); + apic_init_ap(); + apic_timer_calib(apic_get_bsp_id()); + apic_nmi_enab(); + ioapic_init(); + logging_log_debug("AP APIC init done"); + + logging_log_info("AP init complete"); + cpu_halt_loop(); } diff --git a/kernel/core/proc_data.S b/kernel/core/proc_data.S index b29cca3..0fb3c27 100644 --- a/kernel/core/proc_data.S +++ b/kernel/core/proc_data.S @@ -34,10 +34,6 @@ ret .section .data -.globl proc_arb_id -proc_arb_id: -.byte 0 - .globl proc_data_ptr proc_data_ptr: .quad 0 diff --git a/kernel/include/core/idt.h b/kernel/include/core/idt.h index 7090477..88c75d1 100644 --- a/kernel/include/core/idt.h +++ b/kernel/include/core/idt.h @@ -36,6 +36,7 @@ struct idt_entry_t { } __attribute__((packed)); extern void idt_init(void); +extern void idt_init_ap(void); extern void idt_install( uint8_t v, diff --git a/kernel/include/core/proc_data.h b/kernel/include/core/proc_data.h index d534b1f..01b82ae 100644 --- a/kernel/include/core/proc_data.h +++ b/kernel/include/core/proc_data.h @@ -27,8 +27,6 @@ struct proc_data_t { struct proc_alloc_caches_t alloc_caches; }; -extern uint8_t proc_arb_id; - extern struct proc_data_t bsp_proc_data; extern struct proc_data_t** proc_data_ptr; From ee7b08fbb031e940d4ad07bc16caa32ade14ee16 Mon Sep 17 00:00:00 2001 From: Ebrahim Aleem Date: Sun, 15 Feb 2026 14:13:13 -0800 Subject: [PATCH 4/4] preemptive scheduler --- boot/multiboot2/boot.S | 35 +++--- drivers/acpica_osl/osl.c | 4 +- drivers/apic/apic_init.c | 33 +++++- drivers/apic/isr.S | 28 ++--- drivers/apic/isr_dispatch.c | 4 - drivers/hpet/hpet_init.c | 9 -- drivers/include/apic/isr.h | 2 +- drivers/include/apic/isr_dispatch.h | 2 - kernel/core/ap.S | 16 ++- kernel/core/cpu_instr.S | 5 + kernel/core/exception_dispatch.c | 8 +- kernel/core/exceptions.S | 1 - kernel/core/kentry.c | 18 ++- kernel/core/panic.c | 5 +- kernel/core/proc_data.S | 2 +- kernel/core/process.c | 144 +++++++++++++++++++++++ kernel/core/process_control.S | 48 ++++++++ kernel/core/scheduler.c | 88 ++++++++++++++ kernel/core/tss.c | 68 +++-------- kernel/include/core/cpu_instr.h | 2 + kernel/include/core/exception_dispatch.h | 5 +- kernel/include/core/gdt.h | 14 ++- kernel/include/core/kentry.h | 2 + kernel/include/core/proc_data.h | 4 + kernel/include/core/process.h | 105 +++++++++++++++++ kernel/include/core/scheduler.h | 33 ++++++ kernel/include/core/tss.h | 35 +++++- 27 files changed, 591 insertions(+), 129 deletions(-) create mode 100644 kernel/core/process.c create mode 100644 kernel/core/process_control.S create mode 100644 kernel/core/scheduler.c create mode 100644 kernel/include/core/process.h create mode 100644 kernel/include/core/scheduler.h diff --git a/boot/multiboot2/boot.S b/boot/multiboot2/boot.S index 8748ea4..31f0fb1 100644 --- a/boot/multiboot2/boot.S +++ b/boot/multiboot2/boot.S @@ -56,17 +56,11 @@ #define LME 0x100 #define PG (1 << 31) -#define GDT_RW (1 << 1) -#define GDT_E (1 << 3) -#define GDT_S (1 << 4) -#define GDT_P (1 << 7) -#define GDT_L (1 << 5) -#define GDT_G (1 << 7) - #define KERNEL_PAGE_FLAGS (1 + 2) #define PAGE_SZ (1 << 7) #define PAGING_1GIB 0x40000000 +#define IDENT_BASE 0xFFFFFF0000000000 .code32 .section .text.multiboot2 @@ -230,7 +224,8 @@ enter64: .section .text ktransfer: - addq $KERNEL_VMA, gdt_ptr64.addr + movq $IDENT_BASE, %rax + addq %rax, gdt_ptr64.addr lgdt gdt_ptr64 addq $KERNEL_VMA, %rsp pushq $0 @@ -250,20 +245,30 @@ gdt: .short 0x0000 /* limit 0:15 */ .short 0x0000 /* base 0:15 */ .byte 0x00 /* base 16:23 */ - .byte GDT_RW + GDT_E + GDT_S + GDT_P /* access: RW E S P */ - .byte GDT_L + GDT_G /* limit 16:19 flag: L G */ + .byte 0x98 /* access: DPL=0 P */ + .byte 0x20 /* limit 16:19 flag: L */ .byte 0x00 /* base 24:31 */ gdt.dataseg: .short 0x0000 /* limit 0:15 */ .short 0x0000 /* base 0:15 */ .byte 0x00 /* base 16:23 */ - .byte GDT_RW + GDT_S + GDT_P /* access: RW S P */ - .byte GDT_G /* limit 16:19 flag: G */ - .byte 0x92 /* access: RW S P */ + .byte 0x92 /* access: P */ + .byte 0x00 /* limit 16:19 flag: */ + .byte 0x00 /* base 24:31 */ gdt.ucodeseg: - .quad 0 + .short 0x0000 /* limit 0:15 */ + .short 0x0000 /* base 0:15 */ + .byte 0x00 /* base 16:23 */ + .byte 0xF8 /* access: DPL=3 P */ + .byte 0x20 /* limit 16:19 flag: L */ + .byte 0x00 /* base 24:31 */ gdt.udataseg: - .quad 0 + .short 0x0000 /* limit 0:15 */ + .short 0x0000 /* base 0:15 */ + .byte 0x00 /* base 16:23 */ + .byte 0x92 /* access: P */ + .byte 0x00 /* limit 16:19 flag: */ + .byte 0x00 /* base 24:31 */ gdt.tss: .quad 0 .quad 0 diff --git a/drivers/acpica_osl/osl.c b/drivers/acpica_osl/osl.c index dfae399..fdc5147 100644 --- a/drivers/acpica_osl/osl.c +++ b/drivers/acpica_osl/osl.c @@ -35,15 +35,15 @@ #include #include #include -#include #include +#include ACPI_STATUS AcpiOsInitialize(void) { return AE_OK; } ACPI_THREAD_ID AcpiOsGetThreadId(void) { - return proc_data_get()->arb_id + 1; //bsp is zero, acpica wants non zero + return process_get_pid(); } void ACPI_INTERNAL_XFACE AcpiOsPrintf(const char* fmt, ...) { diff --git a/drivers/apic/apic_init.c b/drivers/apic/apic_init.c index ea00fc1..d8b7902 100644 --- a/drivers/apic/apic_init.c +++ b/drivers/apic/apic_init.c @@ -33,7 +33,12 @@ #include #include #include +#include #include +#include +#include + +#include #define APIC_BASE_MASK 0xFFFFFFFFFF000 @@ -60,7 +65,7 @@ #define APIC_CAL_ITR_MS 50 #define APIC_CAL_BATCH 20 -#define APIC_CAL_TOL 5000 +#define APIC_CAL_TOL 8000 #define APIC_CLOCK_MS 50 @@ -79,8 +84,6 @@ #define AP_ARB_BASE 0x9000 -#define INIT_STACK_SIZE 0x4000 - uint64_t apic_base; uint8_t bsp_apic_id; static uint8_t num_apic; @@ -92,7 +95,10 @@ extern uint8_t gdt; extern uint8_t gdt_end; extern uint8_t kernel_pml4; -static uint64_t* init_stacks; +uint64_t* init_stacks; +volatile struct gdt_t(** ap_gdts)[GDT_NUM_ENTRIES]; +volatile struct gdt_ptr_64_t** ap_gdt_ptr_64; +uint8_t* ap_init_locks; void apic_init(void) { apic_base = msr_read(MSR_APIC_BASE) & APIC_BASE_MASK; @@ -103,7 +109,7 @@ void apic_init(void) { error_vector = idt_get_vector(); // install idts - idt_install(timer_vector, (uint64_t)apic_isr_timer, GDT_CODE_SEL, 0, IDT_GATE_INT, 0); + idt_install(timer_vector, (uint64_t)apic_isr_timer, GDT_CODE_SEL, IST_SCHED, IDT_GATE_INT, 0); idt_install(error_vector, (uint64_t)apic_isr_error, GDT_CODE_SEL, 0, IDT_GATE_INT, 0); apic_init_ap(); @@ -113,9 +119,24 @@ void apic_init(void) { proc_data_ptr = kmalloc(sizeof(struct proc_data_t*) * num_apic); proc_data_ptr[0] = &bsp_proc_data; + // init gdts + ap_gdts = kmalloc(sizeof(struct gdt_t(*)[GDT_NUM_ENTRIES]) * num_apic); + ap_gdt_ptr_64 = kmalloc(sizeof(struct gdt_ptr_64_t*) * num_apic); + ap_init_locks = kmalloc(sizeof(uint8_t) * num_apic); + for (--num_apic; num_apic; num_apic--) { proc_data_ptr[num_apic] = kmalloc(sizeof(struct proc_data_t)); - init_stacks[num_apic] = (uint64_t)kmalloc(INIT_STACK_SIZE) + INIT_STACK_SIZE - 16; + init_stacks[num_apic] = (uint64_t)kmalloc(INIT_STACK_SIZE) + INIT_STACK_SIZE; + + ap_gdts[num_apic] = kmalloc(sizeof(struct gdt_t[GDT_NUM_ENTRIES])); + ap_gdt_ptr_64[num_apic] = kmalloc(sizeof(struct gdt_ptr_64_t)); + + kmemcpy((void*)ap_gdts[num_apic], &gdt, (uint64_t)&gdt_end - (uint64_t)&gdt); + ap_gdt_ptr_64[num_apic]->addr = (uint64_t)ap_gdts[num_apic]; + ap_gdt_ptr_64[num_apic]->limit = (uint16_t)((uint64_t)&gdt_end - (uint64_t)&gdt - 1); + + lock_init(&ap_init_locks[num_apic]); + lock_acquire(&ap_init_locks[num_apic]); } } diff --git a/drivers/apic/isr.S b/drivers/apic/isr.S index 890a4d4..f13430e 100644 --- a/drivers/apic/isr.S +++ b/drivers/apic/isr.S @@ -18,6 +18,7 @@ .globl apic_isr_timer apic_isr_timer: pushq %rax +pushq %rbx pushq %rcx pushq %rdx pushq %rsi @@ -26,28 +27,13 @@ pushq %r8 pushq %r9 pushq %r10 pushq %r11 +pushq %r12 +pushq %r13 +pushq %r14 +pushq %r15 pushq %rbp - -movq %rsp, %rbp -movq %rsp, %rax -andq $0xF, %rax -jz .aligned_timer -subq %rax, %rsp -.aligned_timer: -call apic_timer_dispatch -movq %rbp, %rsp - -popq %rbp -popq %r11 -popq %r10 -popq %r9 -popq %r8 -popq %rdi -popq %rsi -popq %rdx -popq %rcx -popq %rax -iretq +movq %rsp, %rdi +jmp process_preempt_entry .globl apic_isr_error apic_isr_error: diff --git a/drivers/apic/isr_dispatch.c b/drivers/apic/isr_dispatch.c index 9e64e6f..eb2d63c 100644 --- a/drivers/apic/isr_dispatch.c +++ b/drivers/apic/isr_dispatch.c @@ -30,10 +30,6 @@ #define ESR_RAE 0x08 #define ESR_SAE 0x04 -void apic_timer_dispatch(void) { - apic_write_reg(APIC_REG_EOI, APIC_EOI); -} - void apic_error_dispatch(void) { const char* ira = ""; const char* riv = ""; diff --git a/drivers/hpet/hpet_init.c b/drivers/hpet/hpet_init.c index 8e435da..ba25577 100644 --- a/drivers/hpet/hpet_init.c +++ b/drivers/hpet/hpet_init.c @@ -24,7 +24,6 @@ #include #include #include -#include #define HPET_GROUP_ENA 0x1 #define HPET_INT_ENA 0x4 @@ -54,30 +53,22 @@ static volatile struct hpet_group_t { struct hpet_timer_t timers[32]; } __attribute__((packed))* hpet_reg_bases[8]; -static uint8_t hpet_lock; - static uint64_t hpet_get_counter(void* meta) { volatile struct hpet_group_t* hpet = meta; - lock_acquire(&hpet_lock); const uint64_t ret = hpet->counter; - lock_release(&hpet_lock); return ret; } static void hpet_set_counter(void* meta, uint64_t counter) { volatile struct hpet_group_t* hpet = meta; - lock_acquire(&hpet_lock); hpet->gen_conf &= ~(uint64_t)HPET_GROUP_ENA; hpet->counter = counter; hpet->gen_conf |= HPET_GROUP_ENA; - lock_release(&hpet_lock); } void hpet_init(void) { acpi_get_hpet_bases((uint64_t*)&hpet_reg_bases[0]); - lock_init(&hpet_lock); - struct clock_src_t* clock; int8_t num_tim; diff --git a/drivers/include/apic/isr.h b/drivers/include/apic/isr.h index f92e06f..9c20c76 100644 --- a/drivers/include/apic/isr.h +++ b/drivers/include/apic/isr.h @@ -18,7 +18,7 @@ #ifndef DRIVERS_APIC_ISR_H #define DRIVERS_APIC_ISR_H -extern void apic_isr_timer(void); +extern void apic_isr_timer(void) __attribute__((noreturn)); extern void apic_isr_error(void); diff --git a/drivers/include/apic/isr_dispatch.h b/drivers/include/apic/isr_dispatch.h index ac96e65..a4e4ae2 100644 --- a/drivers/include/apic/isr_dispatch.h +++ b/drivers/include/apic/isr_dispatch.h @@ -18,8 +18,6 @@ #ifndef DRIVERS_APIC_ISR_DISPATCH_H #define DRIVERS_APIC_ISR_DISPATCH_H -extern void apic_timer_dispatch(void); - extern void apic_error_dispatch(void); #endif /* DIRVERS_APIC_ISR_DISPATCH_H */ diff --git a/kernel/core/ap.S b/kernel/core/ap.S index 94ccdfb..b1b8ea5 100644 --- a/kernel/core/ap.S +++ b/kernel/core/ap.S @@ -84,6 +84,7 @@ movl %cr0, %ecx orl $0x80000001, %ecx movl %ecx, %cr0 +// boostrap using bsp gdt lgdt AP_GDT32 pushw $0 @@ -92,7 +93,7 @@ pushw $0 pushw $0x807d lretl -// 0x2083 +// 0x807d .code64 @@ -111,6 +112,18 @@ movb %bl, %dil movq AP_STACKS, %rax movq (%rax,%rdi,8), %rsp +// switch to correct gdt +movq ap_gdt_ptr_64, %rax +movq (%rax,%rdi,8), %rax +lgdt (%rax) + +movw $0x10, %ax +movw %ax, %ds +movw %ax, %es +movw %ax, %fs +movw %ax, %gs +movw %ax, %ss + pushq $0 popfq @@ -118,6 +131,5 @@ pushq $0x8 pushq $kapentry lretq - .globl ap_bootstrap_end ap_bootstrap_end: diff --git a/kernel/core/cpu_instr.S b/kernel/core/cpu_instr.S index 2b5f773..4359a14 100644 --- a/kernel/core/cpu_instr.S +++ b/kernel/core/cpu_instr.S @@ -41,6 +41,11 @@ cpu_pause: pause ret +.globl cpu_wait_loop +cpu_wait_loop: +hlt +jmp cpu_wait_loop + .globl cpu_halt_loop cpu_halt_loop: cli diff --git a/kernel/core/exception_dispatch.c b/kernel/core/exception_dispatch.c index 14f4c0d..9b66fab 100644 --- a/kernel/core/exception_dispatch.c +++ b/kernel/core/exception_dispatch.c @@ -22,19 +22,19 @@ #include void exception_dispatch(struct exception_context_t* context) { - //TODO: handle from userland - + //TODO: remove reduntant rsp push logging_log_error("Unrecoverable exception 0x%lX (0x%lX) @ 0x%lX", context->vector, context->code, context->rip); logging_log_debug("Register Dump:\r\nrax 0x%lX\r\nrbx 0x%lX\r\nrcx 0x%lX\r\nrdx 0x%lX\ \r\nrsi 0x%lX\r\nrdi 0x%lX\r\nrbp 0x%lX\r\nrsp 0x%lX\r\nr8 0x%lX\r\nr9 0x%lX\r\nr10 0x%lX\ -\r\nr11 0x%lX\r\nr12 0x%lX\r\nr13 0x%lX\r\nr14 0x%lX\r\nr15 0x%lX\r\nrfl 0x%lX\r\ncs 0x%lX\r\nrip 0x%lX", +\r\nr11 0x%lX\r\nr12 0x%lX\r\nr13 0x%lX\r\nr14 0x%lX\r\nr15 0x%lX\r\nrfl 0x%lX\r\ncs 0x%lX\ +\r\nss 0x%lX\r\nrip 0x%lX", context->rax, context->rbx, context->rcx, context->rdx, context->rsi, context->rdi, context->rbp, context->rsp, context->r8, context->r9, context->r10, context->r11, context->r12, context->r13, context->r14, context->r15, - context->rflags, context->cs, context->rip); + context->rflags, context->cs, context->ss, context->rip); panic(PANIC_STATE); } diff --git a/kernel/core/exceptions.S b/kernel/core/exceptions.S index 7745c34..c683c9a 100644 --- a/kernel/core/exceptions.S +++ b/kernel/core/exceptions.S @@ -167,7 +167,6 @@ pushq %r13 pushq %r14 pushq %r15 pushq %rbp -pushq %rsp movq %rsp, %rdi // align stack movq %rsp, %rax diff --git a/kernel/core/kentry.c b/kernel/core/kentry.c index 55931c9..94777f3 100644 --- a/kernel/core/kentry.c +++ b/kernel/core/kentry.c @@ -27,6 +27,9 @@ #include #include #include +#include +#include +#include #include @@ -40,13 +43,18 @@ struct boot_context_t boot_context; extern uint8_t ap_bootstrap_start; +extern uint64_t* init_stacks; extern uint8_t ap_bootstrap_end; +extern volatile struct gdt_t(** ap_gdts)[GDT_NUM_ENTRIES]; +extern uint8_t* ap_init_locks; void kentry(void) { logging_log_debug("Kernel Entry"); logging_log_debug("TSS and IDT init"); - tss_init(); + tss_init((void*)paging_ident((uint64_t)boot_context.gdt)); + process_init(0); idt_init(); + scheduler_init(); logging_log_debug("TSS and IDT init done"); logging_log_debug("ACPI init"); @@ -82,10 +90,11 @@ void kentry(void) { logging_log_info("AP bootstrap sequence done"); - cpu_halt_loop(); + process_kill_current(); } void kapentry(uint64_t arb_id) { + lock_release(&ap_init_locks[arb_id]); logging_log_debug("AP %lx bootstrap complete", arb_id); proc_data_set_id((uint8_t)arb_id); @@ -94,7 +103,8 @@ void kapentry(uint64_t arb_id) { alloc_init(); logging_log_debug("AP TSS and IDT init"); - tss_init(); + tss_init(ap_gdts[proc_data_get()->arb_id]); + process_init_ap(init_stacks[arb_id]); idt_init_ap(); logging_log_debug("AP TSS and IDT init done"); @@ -108,5 +118,5 @@ void kapentry(uint64_t arb_id) { logging_log_info("AP init complete"); - cpu_halt_loop(); + process_kill_current(); } diff --git a/kernel/core/panic.c b/kernel/core/panic.c index d8ec34e..51c7274 100644 --- a/kernel/core/panic.c +++ b/kernel/core/panic.c @@ -20,6 +20,7 @@ #include #include #include +#include static const char* panic_names[] = { @@ -33,8 +34,8 @@ static const char* panic_names[] = { }; void panic(enum panic_code_t code) { - logging_log_error("Panic 0x%lX %s\r\nHalt", (uint64_t)code, + logging_log_error("Panic 0x%lX %s", (uint64_t)code, panic_names[code] ? panic_names[code] : panic_names[PANIC_UNK]); - cpu_halt_loop(); + process_kill_current(); } diff --git a/kernel/core/proc_data.S b/kernel/core/proc_data.S index 0fb3c27..0e75b7f 100644 --- a/kernel/core/proc_data.S +++ b/kernel/core/proc_data.S @@ -25,7 +25,7 @@ ret .globl proc_data_set_id proc_data_set_id: movq proc_data_ptr, %rax -movq (%rax,%rdi,8), %rax +leaq (%rax,%rdi,8), %rax movl $0xC0000101, %ecx movq %rax, %rdx shrq $32,%rdx diff --git a/kernel/core/process.c b/kernel/core/process.c new file mode 100644 index 0000000..df889eb --- /dev/null +++ b/kernel/core/process.c @@ -0,0 +1,144 @@ +/* process.c - kernel process manager */ +/* Copyright (C) 2026 Ebrahim Aleem +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see +*/ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define INIT_RFLG 0x200 +#define INIT_STACK_SIZE 0x4000 + +static uint64_t next_pid; +static uint8_t lock_proc; + +void process_init(uint64_t init_rsp) { + next_pid = 1; + lock_init(&lock_proc); + process_init_ap(init_rsp); +} + +static uint64_t assign_pid(void) { + lock_acquire(&lock_proc); + const uint64_t ret = next_pid++; + lock_release(&lock_proc); + return ret; +} + +uint64_t process_get_pid(void) { + return proc_data_get()->current_process->pid; +} + +void process_init_ap(uint64_t init_rsp) { + struct pcb_t* pcb = kmalloc(sizeof(struct pcb_t)); + pcb->init_rsp = init_rsp; + pcb->init_k_rsp = 0; + pcb->sched_cntr = SCHED_SKIP; + proc_data_get()->current_process = pcb; + proc_data_get()->current_process->pid = assign_pid(); +} + +struct pcb_t* process_from_vaddr(uint64_t vaddr) { + struct pcb_t* pcb = kmalloc(sizeof(struct pcb_t)); + + pcb->rax = + pcb->rbx = + pcb->rcx = + pcb->rdx = + pcb->rbp = + pcb->rsi = + pcb->rdi = + pcb->r8 = + pcb->r9 = + pcb->r10 = + pcb->r11 = + pcb->r12 = + pcb->r13 = + pcb->r14 = + pcb->r15 = 0; + + pcb->rsp = + pcb->init_rsp = (uint64_t)kmalloc(INIT_STACK_SIZE); + pcb->init_k_rsp = 0; + pcb->rsp += INIT_STACK_SIZE; + pcb->k_rsp_lo = 0; + pcb->k_rsp_hi = 0; + + pcb->rflags = INIT_RFLG; + + pcb->rip = vaddr; + pcb->cs = GDT_KERNEL_CS; + pcb->ss = GDT_KERNEL_SS; + + pcb->sched_cntr = SCHED_READY; + + pcb->pid = assign_pid(); + + return pcb; +} + +void process_kill_current(void) { + lock_acquire(&lock_proc); + struct pcb_t* pcb = proc_data_get()->current_process; + pcb->sched_cntr = SCHED_KILL; + logging_log_debug("Killed %ld", pcb->pid); + lock_release(&lock_proc); + + cpu_wait_loop(); +} + +void process_discard(struct pcb_t* pcb) { + kfree((void*)pcb->init_rsp); + kfree(pcb); +} + +void process_preempt_entry(struct preempt_frame_t* context) { + struct pcb_t* pcb = proc_data_get()->current_process; + pcb->rsp = context->rsp; + pcb->rbp = context->rbp; + pcb->r15 = context->r15; + pcb->r14 = context->r14; + pcb->r13 = context->r13; + pcb->r12 = context->r12; + pcb->r11 = context->r11; + pcb->r10 = context->r10; + pcb->r9 = context->r9; + pcb->r8 = context->r8; + pcb->rdi = context->rdi; + pcb->rsi = context->rsi; + pcb->rdx = context->rdx; + pcb->rcx = context->rcx; + pcb->rbx = context->rbx; + pcb->rax = context->rax; + pcb->rip = context->rip; + pcb->cs = context->cs; + pcb->rflags = context->rflags; + pcb->ss = context->ss; + + apic_write_reg(APIC_REG_EOI, APIC_EOI); + + scheduler_run(); +} diff --git a/kernel/core/process_control.S b/kernel/core/process_control.S new file mode 100644 index 0000000..63009cf --- /dev/null +++ b/kernel/core/process_control.S @@ -0,0 +1,48 @@ +/* process_control.S - kernel low level process control routines */ +/* Copyright (C) 2026 Ebrahim Aleem +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see +*/ + +.globl process_resume +process_resume: +movq 0x98(%rdi), %rax +pushq %rax // ss +movq 0x00(%rdi), %rax +pushq %rax // rsp +movq 0x90(%rdi), %rax +pushq %rax // rflags +movq 0x88(%rdi), %rax +pushq %rax // cs +movq 0x80(%rdi), %rax +pushq %rax // rip + +movq 0x08(%rdi), %rbp +movq 0x10(%rdi), %r15 +movq 0x18(%rdi), %r14 +movq 0x20(%rdi), %r13 +movq 0x28(%rdi), %r12 +movq 0x30(%rdi), %r11 +movq 0x38(%rdi), %r10 +movq 0x40(%rdi), %r9 +movq 0x48(%rdi), %r8 +movq 0x58(%rdi), %rsi +movq 0x60(%rdi), %rdx +movq 0x68(%rdi), %rcx +movq 0x70(%rdi), %rbx +movq 0x78(%rdi), %rax + +movq 0x50(%rdi), %rdi + +iretq diff --git a/kernel/core/scheduler.c b/kernel/core/scheduler.c new file mode 100644 index 0000000..dcf7cf8 --- /dev/null +++ b/kernel/core/scheduler.c @@ -0,0 +1,88 @@ +/* scheduler.c - kernel scheduler */ +/* Copyright (C) 2026 Ebrahim Aleem +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see +*/ + +#include + +#include +#include +#include +#include +#include +#include +#include + +static uint8_t lock_sched; +static volatile struct pcb_t* active_queue; +static volatile struct pcb_t* active_queue_tail; + +void scheduler_schedule(struct pcb_t* pcb) { + pcb->next = 0; + lock_acquire(&lock_sched); + if (active_queue_tail) { + active_queue_tail->next = pcb; + } + else { + active_queue = pcb; + active_queue_tail = pcb; + } + lock_release(&lock_sched); +} + +void scheduler_init(void) { + lock_init(&lock_sched); + + active_queue = 0; + active_queue_tail = 0; +} + +void scheduler_run(void) { + struct proc_data_t* pd = proc_data_get(); + struct pcb_t* current_pcb = pd->current_process; + switch (current_pcb->sched_cntr) { + case SCHED_SKIP: + process_resume(current_pcb); + break; + case SCHED_KILL: + kfree((void*)current_pcb->init_rsp); + kfree((void*)current_pcb->init_k_rsp); + break; + default: + scheduler_schedule(current_pcb); + break; + } + + struct pcb_t* run = 0; + while (!run) { + cpu_pause(); + lock_acquire(&lock_sched); + if (active_queue) { + run = (struct pcb_t*)active_queue; + active_queue = active_queue->next; + if (!active_queue) { + active_queue_tail = 0; + } + lock_release(&lock_sched); + break; + } + lock_release(&lock_sched); + } + + pd->tss->rsp0_lo = run->k_rsp_lo; + pd->tss->rsp0_hi = run->k_rsp_hi; + pd->current_process = run; + process_resume(run); +} diff --git a/kernel/core/tss.c b/kernel/core/tss.c index ddd39af..668d748 100644 --- a/kernel/core/tss.c +++ b/kernel/core/tss.c @@ -15,7 +15,6 @@ * along with this program. If not, see */ -#include "core/gdt.h" #include #include @@ -23,83 +22,54 @@ #include #include #include +#include +#include #include -#define RSP_SIZE 0x1000 #define IST_ABORT_SIZE 0x1000 +#define IST_SCHED_SIZE 0x1000 #define IST_LO_MASK 0xFFFFFFFF #define IST_HI_SHFT 32 -struct tss_t { - uint32_t resv0; - uint32_t rsp0_lo; - uint32_t rsp0_hi; - uint32_t rsp1_lo; - uint32_t rsp1_hi; - uint32_t rsp2_lo; - uint32_t rsp2_hi; - uint32_t resv1; - uint32_t resv2; - uint32_t ist1_lo; - uint32_t ist1_hi; - uint32_t ist2_lo; - uint32_t ist2_hi; - uint32_t ist3_lo; - uint32_t ist3_hi; - uint32_t ist4_lo; - uint32_t ist4_hi; - uint32_t ist5_lo; - uint32_t ist5_hi; - uint32_t ist6_lo; - uint32_t ist6_hi; - uint32_t ist7_lo; - uint32_t ist7_hi; - uint32_t resv3; - uint32_t resv4; - uint16_t resv5; - uint16_t iopb; -} __attribute__((packed)); - - -void tss_init(void) { +void tss_init(volatile struct gdt_t(* gdt)[GDT_NUM_ENTRIES]) { volatile struct tss_t* tss = kmalloc(sizeof(struct tss_t)); kmemset((void*)tss, 0, sizeof(struct tss_t)); - const uint64_t rsp0 = (uint64_t)kmalloc(RSP_SIZE); - tss->rsp0_lo = rsp0 & IST_LO_MASK; - tss->rsp0_hi = (uint32_t)(rsp0 >> IST_HI_SHFT); + proc_data_get()->tss = tss; - //TODO: setup ists as needed - - const uint64_t ist1 = (uint64_t)kmalloc(IST_ABORT_SIZE); + const uint64_t ist1 = (uint64_t)kmalloc(IST_ABORT_SIZE) + IST_ABORT_SIZE; tss->ist1_lo = ist1 & IST_LO_MASK; tss->ist1_hi = (uint32_t)(ist1 >> IST_HI_SHFT); - logging_log_debug("New TSS @ 0x%lX - 0x%lX (rsp0) 0x%lX (ist1)", - (uint64_t)tss, rsp0, ist1); + const uint64_t ist2 = (uint64_t)kmalloc(IST_SCHED_SIZE) + IST_SCHED_SIZE; + tss->ist2_lo = ist2 & IST_LO_MASK; + tss->ist2_hi = (uint32_t)(ist2 >> IST_HI_SHFT); - ((struct gdt_sys_t*)&(*boot_context.gdt)[GDT_TSS_INDEX])->base0 = + logging_log_debug("New TSS @ 0x%lX - 0x%lX 0x%lX (ist1)", + (uint64_t)tss, ist1); + + ((struct gdt_sys_t*)&(*gdt)[GDT_TSS_INDEX])->base0 = (uint64_t)tss & GDT_BASE0_MASK; - ((struct gdt_sys_t*)&(*boot_context.gdt)[GDT_TSS_INDEX])->base1 = + ((struct gdt_sys_t*)&(*gdt)[GDT_TSS_INDEX])->base1 = ((uint64_t)tss & GDT_BASE1_MASK) >> GDT_BASE1_SHFT; - ((struct gdt_sys_t*)&(*boot_context.gdt)[GDT_TSS_INDEX])->base2 = + ((struct gdt_sys_t*)&(*gdt)[GDT_TSS_INDEX])->base2 = ((uint64_t)tss & GDT_BASE2_MASK) >> GDT_BASE2_SHFT; - ((struct gdt_sys_t*)&(*boot_context.gdt)[GDT_TSS_INDEX])->base3 = + ((struct gdt_sys_t*)&(*gdt)[GDT_TSS_INDEX])->base3 = (uint32_t)(((uint64_t)tss & GDT_BASE3_MASK) >> GDT_BASE3_SHFT); - ((struct gdt_sys_t*)&(*boot_context.gdt)[GDT_TSS_INDEX])->limit0 = + ((struct gdt_sys_t*)&(*gdt)[GDT_TSS_INDEX])->limit0 = (sizeof(*tss) - 1) & GDT_LIMIT0_MASK; - ((struct gdt_sys_t*)&(*boot_context.gdt)[GDT_TSS_INDEX])->flglmt1 = + ((struct gdt_sys_t*)&(*gdt)[GDT_TSS_INDEX])->flglmt1 = (((sizeof(*tss) - 1) & GDT_LIMIT1_MASK) >> GDT_LIMIT1_SHFT); - ((struct gdt_sys_t*)&(*boot_context.gdt)[GDT_TSS_INDEX])->access = + ((struct gdt_sys_t*)&(*gdt)[GDT_TSS_INDEX])->access = GDT_ACS_TSS | GDT_ACS_P; cpu_ltr_28(); diff --git a/kernel/include/core/cpu_instr.h b/kernel/include/core/cpu_instr.h index af0011f..e6b4908 100644 --- a/kernel/include/core/cpu_instr.h +++ b/kernel/include/core/cpu_instr.h @@ -30,6 +30,8 @@ extern void cpu_sti(void); extern void cpu_pause(void); +extern void cpu_wait_loop(void) __attribute__((noreturn)); + extern void cpu_halt_loop(void) __attribute__((noreturn)); #endif /* KERNEL_CORE_CPU_INSTR_H */ diff --git a/kernel/include/core/exception_dispatch.h b/kernel/include/core/exception_dispatch.h index 9f6f918..7b85533 100644 --- a/kernel/include/core/exception_dispatch.h +++ b/kernel/include/core/exception_dispatch.h @@ -21,7 +21,6 @@ #include struct exception_context_t { - uint64_t rsp; uint64_t rbp; uint64_t r15; uint64_t r14; @@ -42,8 +41,8 @@ struct exception_context_t { uint64_t rip; uint64_t cs; uint64_t rflags; - uint64_t inter_rsp; - uint64_t inter_ss; + uint64_t rsp; + uint64_t ss; } __attribute__((packed)); extern void exception_dispatch(struct exception_context_t* context) __attribute__((noreturn)); diff --git a/kernel/include/core/gdt.h b/kernel/include/core/gdt.h index e8e97bc..2a3fd75 100644 --- a/kernel/include/core/gdt.h +++ b/kernel/include/core/gdt.h @@ -20,7 +20,7 @@ #include -#define GDT_NUM_ENTRIES 6 +#define GDT_NUM_ENTRIES 7 #define GDT_ACS_TSS 0x09 #define GDT_ACS_P 0x80 @@ -43,6 +43,16 @@ #define GDT_UDATA_INDEX 4 #define GDT_TSS_INDEX 5 +#define GDT_KERNEL_CS 0x8 +#define GDT_KERNEL_SS 0x10 +#define GDT_USER_CS 0x18 +#define GDT_USER_SS 0x20 + +struct gdt_ptr_64_t { + uint16_t limit; + uint64_t addr; +} __attribute__((packed)); + struct gdt_t { uint16_t ign0; uint16_t ign1; @@ -50,7 +60,7 @@ struct gdt_t { uint8_t access; uint8_t flg; uint8_t ign3; -}; +} __attribute__((packed)); struct gdt_sys_t { uint16_t limit0; diff --git a/kernel/include/core/kentry.h b/kernel/include/core/kentry.h index 0f6f1c1..c8b8c29 100644 --- a/kernel/include/core/kentry.h +++ b/kernel/include/core/kentry.h @@ -25,6 +25,8 @@ #include +#define INIT_STACK_SIZE 0x4000 + #ifdef GRAPHICSBASE #include #endif /* GRAPHICSBASE */ diff --git a/kernel/include/core/proc_data.h b/kernel/include/core/proc_data.h index 01b82ae..65caf7a 100644 --- a/kernel/include/core/proc_data.h +++ b/kernel/include/core/proc_data.h @@ -21,10 +21,14 @@ #include #include +#include +#include struct proc_data_t { uint8_t arb_id; struct proc_alloc_caches_t alloc_caches; + volatile struct tss_t* tss; + struct pcb_t* current_process; }; extern struct proc_data_t bsp_proc_data; diff --git a/kernel/include/core/process.h b/kernel/include/core/process.h new file mode 100644 index 0000000..58e1156 --- /dev/null +++ b/kernel/include/core/process.h @@ -0,0 +1,105 @@ +/* process.h - kernel process interface */ +/* Copyright (C) 2026 Ebrahim Aleem +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see +*/ + +#ifndef KERNEL_CORE_PROCESS_H +#define KERNEL_CORE_PROCESS_H + +#include + +#include + +struct pcb_t { + // order is important + uint64_t rsp; //0x00 + uint64_t rbp; //0x08 + uint64_t r15; //0x10 + uint64_t r14; //0x18 + uint64_t r13; //0x20 + uint64_t r12; //0x28 + uint64_t r11; //0x30 + uint64_t r10; //0x38 + uint64_t r9; //0x40 + uint64_t r8; //0x48 + uint64_t rdi; //0x50 + uint64_t rsi; //0x58 + uint64_t rdx; //0x60 + uint64_t rcx; //0x68 + uint64_t rbx; //0x70 + uint64_t rax; //0x78 + + uint64_t rip; //0x80 + uint64_t cs; //0x88 + uint64_t rflags; //0x90 + uint64_t ss; // 0x98 + + // end of important order + + uint64_t pid; + uint32_t k_rsp_lo; + uint32_t k_rsp_hi; + uint64_t init_rsp; + uint64_t init_k_rsp; + enum { + SCHED_READY, + SCHED_KILL, + SCHED_SKIP + } sched_cntr; + + struct pcb_t* next; +}; + +struct preempt_frame_t { + uint64_t rbp; + uint64_t r15; + uint64_t r14; + uint64_t r13; + uint64_t r12; + uint64_t r11; + uint64_t r10; + uint64_t r9; + uint64_t r8; + uint64_t rdi; + uint64_t rsi; + uint64_t rdx; + uint64_t rcx; + uint64_t rbx; + uint64_t rax; + + uint64_t rip; + uint64_t cs; + uint64_t rflags; + uint64_t rsp; + uint64_t ss; +} __attribute__((packed)); + +extern uint64_t process_get_pid(void); + +extern void process_init(uint64_t init_rsp); + +extern void process_init_ap(uint64_t init_rsp); + +extern struct pcb_t* process_from_vaddr(uint64_t vaddr); + +extern void process_resume(struct pcb_t* pcb) __attribute__((noreturn)); + +extern void process_kill_current(void) __attribute__((noreturn)); + +extern void process_discard(struct pcb_t* pcb); + +extern void process_preempt_entry(struct preempt_frame_t* context) __attribute__((noreturn)); + +#endif /* KERNEL_CORE_PROCESS_H */ diff --git a/kernel/include/core/scheduler.h b/kernel/include/core/scheduler.h new file mode 100644 index 0000000..383cf40 --- /dev/null +++ b/kernel/include/core/scheduler.h @@ -0,0 +1,33 @@ +/* scheduler.h - kernel scheduler interface */ +/* Copyright (C) 2026 Ebrahim Aleem +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see +*/ + +#ifndef KERNEL_CORE_SCHEDULER_H +#define KERNEL_CORE_SCHEDULER_H + +#include + +#include + +extern void scheduler_init(void); + +extern void scheduler_start(void) __attribute__((noreturn)); + +extern void scheduler_run(void) __attribute__((noreturn)); + +extern void scheduler_schedule(struct pcb_t* pcb); + +#endif /* KERNEL_CORE_SCHEDULER_H */ diff --git a/kernel/include/core/tss.h b/kernel/include/core/tss.h index 6a7055e..f201afd 100644 --- a/kernel/include/core/tss.h +++ b/kernel/include/core/tss.h @@ -18,8 +18,41 @@ #ifndef KERNEL_CORE_TSS_H #define KERNEL_CORE_TSS_H +#include + #define IST_ABORT 1 +#define IST_SCHED 2 + +struct tss_t { + uint32_t resv0; + uint32_t rsp0_lo; + uint32_t rsp0_hi; + uint32_t rsp1_lo; + uint32_t rsp1_hi; + uint32_t rsp2_lo; + uint32_t rsp2_hi; + uint32_t resv1; + uint32_t resv2; + uint32_t ist1_lo; + uint32_t ist1_hi; + uint32_t ist2_lo; + uint32_t ist2_hi; + uint32_t ist3_lo; + uint32_t ist3_hi; + uint32_t ist4_lo; + uint32_t ist4_hi; + uint32_t ist5_lo; + uint32_t ist5_hi; + uint32_t ist6_lo; + uint32_t ist6_hi; + uint32_t ist7_lo; + uint32_t ist7_hi; + uint32_t resv3; + uint32_t resv4; + uint16_t resv5; + uint16_t iopb; +} __attribute__((packed)); -extern void tss_init(void); +extern void tss_init(volatile struct gdt_t(* gdt)[GDT_NUM_ENTRIES]); #endif /* KERNEL_CORE_TSS_H */