2023-08-05 21:15:05 +08:00
|
|
|
/* Copyright 2023 The ChromiumOS Authors
|
|
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <zephyr/devicetree.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <kernel_internal.h>
|
|
|
|
|
|
|
|
extern char _mtk_adsp_sram_end[];
|
|
|
|
|
|
|
|
#define SRAM_START DT_REG_ADDR(DT_NODELABEL(sram0))
|
|
|
|
#define SRAM_SIZE DT_REG_SIZE(DT_NODELABEL(sram0))
|
|
|
|
#define SRAM_END (SRAM_START + SRAM_SIZE)
|
|
|
|
|
|
|
|
extern char _mtk_adsp_dram_end[];
|
|
|
|
|
|
|
|
#define DRAM_START DT_REG_ADDR(DT_NODELABEL(dram0))
|
|
|
|
#define DRAM_SIZE DT_REG_SIZE(DT_NODELABEL(dram0))
|
|
|
|
#define DRAM_END (DRAM_START + DRAM_SIZE)
|
|
|
|
|
|
|
|
/* This is the true boot vector. This device allows for direct
|
|
|
|
* setting of the alternate reset vector, so we let it link wherever
|
|
|
|
* it lands and extract its address in the loader. This represents
|
|
|
|
* the minimum amount of effort required to successfully call a C
|
|
|
|
* function (and duplicates a few versions elsewhere in the tree:
|
|
|
|
* really this should move to the arch layer).
|
|
|
|
*/
|
|
|
|
__asm__(".align 4\n\t"
|
|
|
|
".global mtk_adsp_boot_entry\n\t"
|
|
|
|
"mtk_adsp_boot_entry:\n\t"
|
|
|
|
" movi a0, 0x4002f\n\t" /* WOE|EXCM|INTLVL=15 */
|
|
|
|
" wsr a0, PS\n\t"
|
|
|
|
" movi a0, 0\n\t"
|
|
|
|
" wsr a0, WINDOWBASE\n\t"
|
|
|
|
" movi a0, 1\n\t"
|
|
|
|
" wsr a0, WINDOWSTART\n\t"
|
|
|
|
" rsync\n\t"
|
|
|
|
" movi a1, 0x40040000\n\t"
|
|
|
|
" call4 c_boot\n\t");
|
|
|
|
|
|
|
|
/* Initial MPU configuration, needed to enable caching */
|
|
|
|
static void enable_mpu(void)
|
|
|
|
{
|
|
|
|
/* Note: we set the linked/in-use-by-zephyr regions of both
|
|
|
|
* SRAM and DRAM cached for performance. The remainder is
|
|
|
|
* left uncached, as it's likely to be shared with the host
|
|
|
|
* and/or DMA. This seems like a good default choice pending
|
|
|
|
* proper MPU integration
|
|
|
|
*/
|
|
|
|
static const uint32_t mpu[][2] = {
|
|
|
|
{ 0x00000000, 0x06000 }, /* inaccessible null region */
|
|
|
|
{ 0x10000000, 0x06f00 }, /* MMIO registers */
|
|
|
|
{ 0x1d000000, 0x06000 }, /* inaccessible */
|
|
|
|
{ SRAM_START, 0xf7f00 }, /* cached SRAM */
|
|
|
|
{ (uint32_t)&_mtk_adsp_sram_end, 0x06f00 }, /* uncached SRAM */
|
|
|
|
{ SRAM_END, 0x06000 }, /* inaccessible */
|
|
|
|
{ DRAM_START, 0xf7f00 }, /* cached DRAM */
|
|
|
|
{ (uint32_t)&_mtk_adsp_dram_end, 0x06f00 }, /* uncached DRAM */
|
|
|
|
{ DRAM_END, 0x06000 }, /* inaccessible top of mem */
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Must write BACKWARDS FROM THE END to avoid introducing a
|
|
|
|
* non-monotonic segment at the current instruction fetch. The
|
|
|
|
* exception triggers even if all the segments involved are
|
|
|
|
* disabled!
|
|
|
|
*/
|
|
|
|
int32_t nseg = ARRAY_SIZE(mpu);
|
|
|
|
|
|
|
|
for (int32_t i = 31; i >= 32 - nseg; i--) {
|
|
|
|
int32_t mpuidx = i - (32 - nseg);
|
|
|
|
uint32_t addren = mpu[mpuidx][0] | 1;
|
|
|
|
uint32_t segprot = (mpu[mpuidx][1]) | i;
|
|
|
|
|
|
|
|
/* If an active pipelined instruction fetch is in the
|
|
|
|
* same segment, wptlb must be preceded by a memw in
|
|
|
|
* the same cache line. Jumping to an aligned-by-8
|
|
|
|
* address ensures that the following two (3-byte)
|
|
|
|
* instructions are in the same 8 byte-aligned region.
|
|
|
|
*/
|
|
|
|
__asm__ volatile(" j 1f\n"
|
|
|
|
".align 8\n"
|
|
|
|
"1:\n"
|
|
|
|
" memw\n"
|
|
|
|
" wptlb %1, %0"
|
|
|
|
:: "r"(addren), "r"(segprot));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Temporary console output, pending integration of a winstream
|
|
|
|
* backend. This simply appends a null-terminated string to an
|
|
|
|
* otherwise unused 1M region of shared DRAM (it's a hole in the SOF
|
|
|
|
* memory map before the DMA memory, so untouched by existing audio
|
|
|
|
* firmware), making early debugging much easier: it can be read
|
|
|
|
* directly out of /dev/mem (with e.g. dd | hexdump) and survives
|
|
|
|
* device resets/panics/etc. But it doesn't handle more than 1M of
|
|
|
|
* output, there's no way to detect a reset of the stream, and in fact
|
|
|
|
* it's actually racy with device startup as if you read too early
|
|
|
|
* you'll see the old run and not the new one. And it's wasteful,
|
|
|
|
* even if this device has a ton of usably-mapped DRAM
|
|
|
|
*
|
|
|
|
* Also note that the storage for the buffer and length value get
|
|
|
|
* reset by the DRAM clear near the end of c_boot(). If you want to
|
|
|
|
* use this for extremely early logging you'll need to stub out the
|
|
|
|
* dram clear and also set buf[0] to 0 manually (as it isn't affected
|
|
|
|
* by device reset).
|
|
|
|
*/
|
|
|
|
int arch_printk_char_out(int c)
|
|
|
|
{
|
|
|
|
char volatile * const buf = (void *)0x60700000;
|
|
|
|
const size_t max = 0x100000 - 4;
|
|
|
|
int volatile * const len = (int *)&buf[max];
|
|
|
|
|
|
|
|
if (*len < max) {
|
|
|
|
buf[*len + 1] = 0;
|
|
|
|
buf[(*len)++] = c;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void c_boot(void)
|
|
|
|
{
|
|
|
|
extern char _bss_start, _bss_end, z_xtensa_vecbase; /* Linker-emitted */
|
|
|
|
uint32_t memctl = 0xffffff00; /* enable all caches */
|
|
|
|
|
|
|
|
/* Clear bss before doing anything else, device memory is
|
|
|
|
* persistent across resets (!) and we'd like our static
|
|
|
|
* variables to be actually zero. Do this without using
|
|
|
|
* memset() out of pedantry (because we don't know which libc is
|
|
|
|
* in use or whether it requires statics).
|
|
|
|
*/
|
|
|
|
for (char *p = &_bss_start; p < &_bss_end; p++) {
|
|
|
|
*p = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Set up MPU memory regions, both for protection and to
|
|
|
|
* enable caching (the hardware defaults is "uncached rwx
|
|
|
|
* memory everywhere").
|
|
|
|
*/
|
|
|
|
enable_mpu();
|
|
|
|
|
|
|
|
/* But the CPU core won't actually use the cache without MEMCTL... */
|
|
|
|
__asm__ volatile("wsr %0, MEMCTL; rsync" :: "r"(memctl));
|
|
|
|
|
|
|
|
/* Need the vector base set to receive exceptions and
|
|
|
|
* interrupts (including register window exceptions, meaning
|
|
|
|
* we can't make C function calls until this is done!)
|
|
|
|
*/
|
|
|
|
__asm__ volatile("wsr %0, VECBASE; rsync" :: "r"(&z_xtensa_vecbase));
|
|
|
|
|
|
|
|
mtk_adsp_cpu_freq_init();
|
|
|
|
|
|
|
|
/* Likewise, memory power is external to the device, and the
|
|
|
|
* kernel SOF loader doesn't zero it, so zero our unlinked
|
|
|
|
* memory to prevent possible pollution from previous runs.
|
|
|
|
* This region is uncached, no need to flush.
|
|
|
|
*/
|
|
|
|
memset(_mtk_adsp_sram_end, 0, SRAM_END - (uint32_t)&_mtk_adsp_sram_end);
|
|
|
|
memset(_mtk_adsp_dram_end, 0, DRAM_END - (uint32_t)&_mtk_adsp_dram_end);
|
|
|
|
|
|
|
|
/* Clear pending interrupts. Note that this hardware has a
|
|
|
|
* habit of starting with all its timer interrupts flagged.
|
|
|
|
* These have to be cleared by writing to the equivalent
|
|
|
|
* CCOMPAREn register. Assumes XCHAL_NUM_TIMERS == 3...
|
|
|
|
*/
|
|
|
|
uint32_t val = 0;
|
|
|
|
|
|
|
|
__asm__ volatile("wsr %0, CCOMPARE0" :: "r"(val));
|
|
|
|
__asm__ volatile("wsr %0, CCOMPARE1" :: "r"(val));
|
|
|
|
__asm__ volatile("wsr %0, CCOMPARE2" :: "r"(val));
|
|
|
|
__ASSERT_NO_MSG(XCHAL_NUM_TIMERS == 3);
|
|
|
|
val = 0xffffffff;
|
|
|
|
__asm__ volatile("wsr %0, INTCLEAR" :: "r"(val));
|
|
|
|
|
2024-07-07 03:31:13 +08:00
|
|
|
z_prep_c();
|
2023-08-05 21:15:05 +08:00
|
|
|
}
|