zephyr/include/arch/xtensa/cache.h

/*
 * Copyright 2021 Intel Corporation
 * SPDX-License-Identifier: Apache-2.0
 */
#ifndef ZEPHYR_INCLUDE_ARCH_XTENSA_CACHE_H_
#define ZEPHYR_INCLUDE_ARCH_XTENSA_CACHE_H_

#include <xtensa/config/core-isa.h>
#include <toolchain.h>
#include <sys/util.h>

#ifdef __cplusplus
extern "C" {
#endif

#define Z_DCACHE_MAX (XCHAL_DCACHE_SIZE / XCHAL_DCACHE_WAYS)

#define Z_IS_POW2(x) (((x) != 0) && (((x) & ((x)-1)) == 0))

#if XCHAL_DCACHE_SIZE
BUILD_ASSERT(Z_IS_POW2(XCHAL_DCACHE_LINESIZE));
BUILD_ASSERT(Z_IS_POW2(Z_DCACHE_MAX));
#endif

static ALWAYS_INLINE void z_xtensa_cache_flush(void *addr, size_t bytes)
{
#if XCHAL_DCACHE_SIZE
	size_t step = XCHAL_DCACHE_LINESIZE;
	size_t first = ROUND_DOWN(addr, step);
	size_t last = ROUND_UP(((long)addr) + bytes, step);
	size_t line;

	for (line = first; bytes && line < last; line += step) {
		__asm__ volatile("dhwb %0, 0" :: "r"(line));
	}
#endif
}

static ALWAYS_INLINE void z_xtensa_cache_flush_inv(void *addr, size_t bytes)
{
#if XCHAL_DCACHE_SIZE
	size_t step = XCHAL_DCACHE_LINESIZE;
	size_t first = ROUND_DOWN(addr, step);
	size_t last = ROUND_UP(((long)addr) + bytes, step);
	size_t line;

	for (line = first; bytes && line < last; line += step) {
		__asm__ volatile("dhwbi %0, 0" :: "r"(line));
	}
#endif
}

static ALWAYS_INLINE void z_xtensa_cache_inv(void *addr, size_t bytes)
{
#if XCHAL_DCACHE_SIZE
	size_t step = XCHAL_DCACHE_LINESIZE;
	size_t first = ROUND_DOWN(addr, step);
	size_t last = ROUND_UP(((long)addr) + bytes, step);
	size_t line;

	for (line = first; bytes && line < last; line += step) {
		__asm__ volatile("dhi %0, 0" :: "r"(line));
	}
#endif
}

static ALWAYS_INLINE void z_xtensa_cache_inv_all(void)
{
	z_xtensa_cache_inv(NULL, Z_DCACHE_MAX);
}

static ALWAYS_INLINE void z_xtensa_cache_flush_all(void)
{
	z_xtensa_cache_flush(NULL, Z_DCACHE_MAX);
}

static ALWAYS_INLINE void z_xtensa_cache_flush_inv_all(void)
{
	z_xtensa_cache_flush_inv(NULL, Z_DCACHE_MAX);
}

#ifdef CONFIG_ARCH_HAS_COHERENCE
static inline bool arch_mem_coherent(void *ptr)
{
	size_t addr = (size_t) ptr;

	return (addr >> 29) == CONFIG_XTENSA_UNCACHED_REGION;
}
#endif

static ALWAYS_INLINE uint32_t z_xtrpoflip(uint32_t addr, uint32_t rto, uint32_t rfrom)
{
	/* The math here is all compile-time: when the two regions
	 * differ by a power of two, we can convert between them by
	 * setting or clearing just one bit.  Otherwise it needs two
	 * operations.
	 */
	uint32_t rxor = (rto ^ rfrom) << 29;

	rto <<= 29;
	if (Z_IS_POW2(rxor)) {
		if ((rxor & rto) == 0) {
			return addr & ~rxor;
		} else {
			return addr | rxor;
		}
	} else {
		return (addr & ~(7U << 29)) | rto;
	}
}

/**
 * @brief Return cached pointer to a RAM address
 *
 * The Xtensa coherence architecture maps addressable RAM twice, in
 * two different 512MB regions whose L1 cache settings can be
 * controlled independently.  So for any given pointer, it is possible
 * to convert it to and from a cached version.
 *
 * This function takes a pointer to any addressible object (either in
 * cacheable memory or not) and returns a pointer that can be used to
 * refer to the same memory through the L1 data cache.  Data read
 * through the resulting pointer will reflect locally cached values on
 * the current CPU if they exist, and writes will go first into the
 * cache and be written back later.
 *
 * @see arch_xtensa_uncached_ptr()
 *
 * @param ptr A pointer to a valid C object
 * @return A pointer to the same object via the L1 dcache
 */
static inline void *arch_xtensa_cached_ptr(void *ptr)
{
	return (void *)z_xtrpoflip((uint32_t) ptr,
				   CONFIG_XTENSA_CACHED_REGION,
				   CONFIG_XTENSA_UNCACHED_REGION);
}

/**
 * @brief Return uncached pointer to a RAM address
 *
 * The Xtensa coherence architecture maps addressable RAM twice, in
 * two different 512MB regions whose L1 cache settings can be
 * controlled independently.  So for any given pointer, it is possible
 * to convert it to and from a cached version.
 *
 * This function takes a pointer to any addressible object (either in
 * cacheable memory or not) and returns a pointer that can be used to
 * refer to the same memory while bypassing the L1 data cache.  Data
 * in the L1 cache will not be inspected nor modified by the access.
 *
 * @see arch_xtensa_cached_ptr()
 *
 * @param ptr A pointer to a valid C object
 * @return A pointer to the same object bypassing the L1 dcache
 */
static inline void *arch_xtensa_uncached_ptr(void *ptr)
{
	return (void *)z_xtrpoflip((uint32_t) ptr,
				   CONFIG_XTENSA_UNCACHED_REGION,
				   CONFIG_XTENSA_CACHED_REGION);
}

/* Utility to generate an unrolled and optimal[1] code sequence to set
 * the RPO TLB registers (contra the HAL cacheattr macros, which
 * generate larger code and can't be called from C), based on the
 * KERNEL_COHERENCE configuration in use.  Selects RPO attribute "2"
 * for regions (including MMIO registers in region zero) which want to
 * bypass L1, "4" for the cached region which wants writeback, and
 * "15" (invalid) elsewhere.
 *
 * Note that on cores that have the "translation" option set, we need
 * to put an identity mapping in the high bits.  Also per spec
 * changing the current code region (by definition cached) requires
 * that WITLB be followed by an ISYNC and that both instructions live
 * in the same cache line (two 3-byte instructions fit in an 8-byte
 * aligned region, so that's guaranteed not to cross a cache line
 * boundary).
 *
 * [1] With the sole exception of gcc's infuriating insistence on
 * emitting a precomputed literal for addr + addrincr instead of
 * computing it with a single ADD instruction from values it already
 * has in registers.  Explicitly assigning the variables to registers
 * via an attribute works, but then emits needless MOV instructions
 * instead.  I tell myself it's just 32 bytes of .text, but... Sigh.
 */
#define _REGION_ATTR(r)						\
	((r) == 0 ? 2 :						\
	 ((r) == CONFIG_XTENSA_CACHED_REGION ? 4 :		\
	  ((r) == CONFIG_XTENSA_UNCACHED_REGION ? 2 : 15)))

#define _SET_ONE_TLB(region) do {				\
	uint32_t attr = _REGION_ATTR(region);			\
	if (XCHAL_HAVE_XLT_CACHEATTR) {				\
		attr |= addr; /* RPO with translation */	\
	}							\
	if (region != CONFIG_XTENSA_CACHED_REGION) {		\
		__asm__ volatile("wdtlb %0, %1; witlb %0, %1"	\
				 :: "r"(attr), "r"(addr));	\
	} else {						\
		__asm__ volatile("wdtlb %0, %1"			\
				 :: "r"(attr), "r"(addr));	\
		__asm__ volatile("j 1f; .align 8; 1:");		\
		__asm__ volatile("witlb %0, %1; isync"		\
				 :: "r"(attr), "r"(addr));	\
	}							\
	addr += addrincr;					\
} while (0)

#define ARCH_XTENSA_SET_RPO_TLB() do {				\
	register uint32_t addr = 0, addrincr = 0x20000000;	\
	FOR_EACH(_SET_ONE_TLB, (;), 0, 1, 2, 3, 4, 5, 6, 7);	\
} while (0)

#ifdef __cplusplus
} /* extern "C" */
#endif

#endif /* ZEPHYR_INCLUDE_ARCH_XTENSA_CACHE_H_ */