294 lines
8.8 KiB
C
294 lines
8.8 KiB
C
/*
|
|
* Copyright (c) 2018 Intel Corporation
|
|
*
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*/
|
|
#include "xuk-config.h"
|
|
#include "shared-page.h"
|
|
#include "x86_64-hw.h"
|
|
|
|
#ifdef CONFIG_XUK_DEBUG
|
|
#include "printf.h"
|
|
#include "vgacon.h"
|
|
#include "serial.h"
|
|
#else
|
|
int printf(const char *fmt, ...)
|
|
{
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
/* This i386 code stub is designed to link internally (i.e. it shares
|
|
* nothing with the 64 bit world) and be loaded into RAM in high
|
|
* memory (generally at 0x100000) in a single (R/W/X) block with its
|
|
* .text, .rodata, .data and .bss included. Its stack lives in the
|
|
* fifth page of memory at 0x04000-0x4fff. After finishing 64 bit
|
|
* initialization, it will JMP to the 16-byte-aligned address that
|
|
* immediately follows this block in memory (exposed by the linker as
|
|
* _start64), which should then be able to run in an environment where
|
|
* all of physical RAM is mapped, except for the bottom 16kb.
|
|
*
|
|
* Memory layout on exit:
|
|
*
|
|
* + Pages 0-3 are an unmapped NULL guard
|
|
* + Page 4: contains stack and bss for the setup code, and a GDT.
|
|
* After 64 bit setup, it's likely this will be reused .
|
|
* + Pages 5-11: are the bootstrap page table
|
|
*
|
|
* Note that the initial page table makes no attempt to identify
|
|
* memory regions. Everything in the first 4G is mapped as cachable
|
|
* RAM. MMIO drivers will need to remap their memory based on PCI BAR
|
|
* regions or whatever.
|
|
*/
|
|
|
|
/* Cute trick to turn a preprocessor macro containing a number literal
|
|
* into a string immediate in gcc basic asm context
|
|
*/
|
|
#define _ASM_IMM(s) #s
|
|
#define ASM_IMM(s) "$" _ASM_IMM(s)
|
|
|
|
/* Entry point, to be linked at the very start of the image. Set a
|
|
* known-good stack (either the top of the shared page for the boot
|
|
* CPU, or one provided by stub16 on others), push the multiboot
|
|
* arguments in EAX, EBX and call into C code.
|
|
*/
|
|
__asm__(".pushsection .start32\n"
|
|
" mov $0x5000, %esp\n"
|
|
" xor %edx, %edx\n"
|
|
" cmp " ASM_IMM(BOOT_MAGIC_STUB16) ", %eax\n"
|
|
" cmove 0x4000(%edx), %esp\n"
|
|
" pushl %ebx\n"
|
|
" pushl %eax\n"
|
|
" call cstart\n"
|
|
".popsection\n");
|
|
|
|
/* The multiboot header can be anywhere in the first 4k of the file.
|
|
* This stub doesn't get that big, so we don't bother with special
|
|
* linkage.
|
|
*/
|
|
#define MULTIBOOT_MAGIC 0x1badb002
|
|
#define MULTIBOOT_FLAGS (1<<1) /* 2nd bit is "want memory map" */
|
|
const int multiboot_header[] = {
|
|
MULTIBOOT_MAGIC,
|
|
MULTIBOOT_FLAGS,
|
|
-(MULTIBOOT_MAGIC + MULTIBOOT_FLAGS), /* csum: -(magic+flags) */
|
|
};
|
|
|
|
/* Creates and returns a generic/sane page table for 64 bit startup
|
|
* (64 bit mode requires paging enabled). All of the bottom 4G
|
|
* (whether backing memory is present or not) gets a mapping with 2M
|
|
* pages, except that the bottom 2M are mapped with 4k pages and leave
|
|
* the first four pages unmapped as a NULL guard.
|
|
*
|
|
* Makes no attempt to identify non-RAM/MMIO regions, it just maps
|
|
* everything. We rely on the firmware to have set up MTRRs for us
|
|
* where needed, otherwise that will all be cacheable memory.
|
|
*/
|
|
void *init_page_tables(void)
|
|
{
|
|
/* Top level PML4E points to a single PDPTE in its first entry */
|
|
struct pte64 *pml4e = alloc_page(1);
|
|
struct pte64 *pdpte = alloc_page(1);
|
|
|
|
pml4e[0].addr = (unsigned long)pdpte;
|
|
pml4e[0].present = 1;
|
|
pml4e[0].writable = 1;
|
|
|
|
/* The PDPTE has four entries covering the first 4G of memory,
|
|
* each pointing to a PDE
|
|
*/
|
|
for (unsigned int gb = 0; gb < 4; gb++) {
|
|
struct pte64 *pde = alloc_page(0);
|
|
|
|
pdpte[gb].addr = (unsigned long)pde;
|
|
pdpte[gb].present = 1;
|
|
pdpte[gb].writable = 1;
|
|
|
|
/* Each PDE filled with 2M supervisor pages */
|
|
for (int i = 0; i < 512; i++) {
|
|
if (!(gb == 0U && i == 0)) {
|
|
pde[i].addr = (gb << 30) | (i << 21);
|
|
pde[i].present = 1;
|
|
pde[i].writable = 1;
|
|
pde[i].pagesize_pat = 1;
|
|
} else {
|
|
/* EXCEPT the very first entry of the
|
|
* first GB, which is a pointer to a
|
|
* PTE of 4k pages so that we can have
|
|
* a 16k (4-page) NULL guard unmapped.
|
|
*/
|
|
struct pte64 *pte = alloc_page(0);
|
|
|
|
pde[0].addr = (unsigned long)pte;
|
|
pde[0].present = 1;
|
|
pde[0].writable = 1;
|
|
|
|
for (int j = 0; j < 512; j++) {
|
|
if (j < 4) {
|
|
pte[j].addr = 0;
|
|
} else {
|
|
pte[j].addr = j << 12;
|
|
pte[j].present = 1;
|
|
pte[j].writable = 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Flush caches out of paranoia. In theory, x86 page walking
|
|
* happens downstream of the system-coherent dcache and this
|
|
* isn't needed.
|
|
*/
|
|
__asm__ volatile("wbinvd");
|
|
return pml4e;
|
|
}
|
|
|
|
#ifdef CONFIG_XUK_DEBUG
|
|
void putchar(int c)
|
|
{
|
|
serial_putc(c);
|
|
vgacon_putc(c);
|
|
}
|
|
#endif
|
|
|
|
void cstart(unsigned int magic, unsigned int arg)
|
|
{
|
|
if (magic == BOOT_MAGIC_STUB16) {
|
|
printf("SMP CPU up in 32 bit protected mode. Stack ~%xh\n",
|
|
&magic);
|
|
}
|
|
|
|
if (magic != BOOT_MAGIC_STUB16) {
|
|
shared_init();
|
|
#ifdef CONFIG_XUK_DEBUG
|
|
serial_init();
|
|
z_putchar = putchar;
|
|
#endif
|
|
|
|
printf("Entering stub32 on boot cpu, magic %xh stack ~%xh\n",
|
|
magic, (int)&magic);
|
|
}
|
|
|
|
/* The multiboot memory map turns out not to be very useful.
|
|
* The basic numbers logged here are only a subset of the true
|
|
* memory map if it has holes or >4G memory, and the full map
|
|
* passed in the second argument tends to live in low memory
|
|
* and get easily clobbered by our own muckery. If we care
|
|
* about reading memory maps at runtime we probably want to be
|
|
* using BIOS e820 like Linux does.
|
|
*/
|
|
if (magic == BOOT_MAGIC_MULTIBOOT) {
|
|
printf("Magic: %p MBI Addr: %p\n", (void *)magic, (void *)arg);
|
|
|
|
int mem_lower = *(int *)(arg + 4);
|
|
int mem_upper = *(int *)(arg + 8);
|
|
int mmap_length = *(int *)(arg + 44);
|
|
int *mmap_addr = *(void **)(arg + 48);
|
|
|
|
printf("mem lower %d upper %d mmap_len %d mmap_addr %p\n",
|
|
mem_lower, mem_upper, mmap_length, mmap_addr);
|
|
}
|
|
|
|
/* Choose a stack pointer and CPU ID for the 64 bit code to
|
|
* use. Then if we're not the boot CPU, release the spinlock
|
|
* (taken in stub16) so the other CPUs can continue.
|
|
*/
|
|
int cpu_id = 0;
|
|
unsigned int init_stack = 0x5000;
|
|
|
|
if (magic == BOOT_MAGIC_STUB16) {
|
|
cpu_id = _shared.num_active_cpus++;
|
|
init_stack = _shared.smpinit_stack;
|
|
_shared.smpinit_stack = 0U;
|
|
__asm__ volatile("movl $0, (%0)" : : "m"(_shared.smpinit_lock));
|
|
}
|
|
|
|
/* Page table goes in CR3. This is a noop until paging is
|
|
* enabled later
|
|
*/
|
|
if (magic != BOOT_MAGIC_STUB16) {
|
|
_shared.base_cr3 = (unsigned int)init_page_tables();
|
|
}
|
|
SET_CR("cr3", _shared.base_cr3);
|
|
|
|
/* Enable PAE bit (5) in CR4, required because in long mode
|
|
* we'll be using the 64 bit page entry format. Likewise a
|
|
* noop until the CPU starts loading pages.
|
|
*/
|
|
SET_CR_BIT("cr4", 5);
|
|
|
|
/* Set LME (long mode enable) in IA32_EFER. Still not a mode
|
|
* transition, simply tells the CPU that, once paging is
|
|
* enabled, we should enter long mode. At that point the LMA
|
|
* bit (10) will be set to indicate that it's active.
|
|
*/
|
|
const int MSR_IA32_EFER = 0xc0000080;
|
|
|
|
set_msr_bit(MSR_IA32_EFER, 8);
|
|
|
|
/* NOW we transition by turning paging on. The CPU will start
|
|
* page translation (which has been carefully
|
|
* identity-mapped!) and enter the 32 bit compatibility
|
|
* submode of long mode. So we're reading 64 bit page tables
|
|
* but still executing 32 bit instructions.
|
|
*/
|
|
SET_CR_BIT("cr0", 31);
|
|
|
|
printf("Hello memory mapped world!\n");
|
|
|
|
/* Now we can enter true 64 bit long mode via a far call to a
|
|
* code segment with the 64 bit flag set. Allocate a 2-entry
|
|
* GDT (entry 0 is always a "null segment" architecturally and
|
|
* can't be used) here on the stack and throw it away after
|
|
* the jump. The 64 bit OS code will need to set the
|
|
* descriptors up for itself anyway
|
|
*/
|
|
struct gdt64 cs[] = {
|
|
{ },
|
|
{
|
|
.readable = 1,
|
|
.codeseg = 1,
|
|
.notsystem = 1,
|
|
.present = 1,
|
|
.long64 = 1,
|
|
},
|
|
};
|
|
|
|
/* The limit comes first, but is 16 bits. The dummy is there
|
|
* for alignment, though docs aren't clear on whether it's
|
|
* required or not
|
|
*/
|
|
struct {
|
|
unsigned short dummy;
|
|
unsigned short limit;
|
|
unsigned int addr;
|
|
} gdtp = { .limit = sizeof(cs), .addr = (int)&cs[0], };
|
|
|
|
printf("CS descriptor 0x%x 0x%x\n", cs[1].dwords[1], cs[1].dwords[0]);
|
|
__asm__ volatile("lgdt %0" : : "m"(gdtp.limit) : "memory");
|
|
|
|
/* Finally, make a far jump into the 64 bit world. The entry
|
|
* point is a 16-byte-aligned address that immediately follows
|
|
* our stub, and is exposed by our linkage as "_start64".
|
|
*
|
|
* Indirect far jumps have a similar crazy setup to descriptor
|
|
* tables, but here the segment selector comes last so no
|
|
* alignment worries.
|
|
*
|
|
* The 64 bit entry reuses the same stack we're on, and takes
|
|
* the cpu_id in its first argument.
|
|
*/
|
|
extern int _start64;
|
|
unsigned int jmpaddr = (unsigned int) &_start64;
|
|
struct {
|
|
unsigned int addr;
|
|
unsigned short segment;
|
|
} farjmp = { .segment = GDT_SELECTOR(1), .addr = jmpaddr };
|
|
|
|
printf("Making far jump to 64 bit mode @%xh...\n", &_start64);
|
|
__asm__ volatile("mov %0, %%esp; ljmp *%1" ::
|
|
"r"(init_stack), "m"(farjmp), "D"(cpu_id)
|
|
: "memory");
|
|
}
|