/* * Copyright (c) 2020 Intel Corporation * * SPDX-License-Identifier: Apache-2.0 * * Routines for managing virtual address spaces */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL); #ifdef CONFIG_DEMAND_PAGING #include #endif /* CONFIG_DEMAND_PAGING */ /* * General terminology: * - A page frame is a page-sized physical memory region in RAM. It is a * container where a data page may be placed. It is always referred to by * physical address. We have a convention of using uintptr_t for physical * addresses. We instantiate a struct k_mem_page_frame to store metadata for * every page frame. * * - A data page is a page-sized region of data. It may exist in a page frame, * or be paged out to some backing store. Its location can always be looked * up in the CPU's page tables (or equivalent) by virtual address. * The data type will always be void * or in some cases uint8_t * when we * want to do pointer arithmetic. */ /* Spinlock to protect any globals in this file and serialize page table * updates in arch code */ struct k_spinlock z_mm_lock; /* * General page frame management */ /* Database of all RAM page frames */ struct k_mem_page_frame k_mem_page_frames[K_MEM_NUM_PAGE_FRAMES]; #if __ASSERT_ON /* Indicator that k_mem_page_frames has been initialized, many of these APIs do * not work before POST_KERNEL */ static bool page_frames_initialized; #endif /* Add colors to page table dumps to indicate mapping type */ #define COLOR_PAGE_FRAMES 1 #if COLOR_PAGE_FRAMES #define ANSI_DEFAULT "\x1B" "[0m" #define ANSI_RED "\x1B" "[1;31m" #define ANSI_GREEN "\x1B" "[1;32m" #define ANSI_YELLOW "\x1B" "[1;33m" #define ANSI_BLUE "\x1B" "[1;34m" #define ANSI_MAGENTA "\x1B" "[1;35m" #define ANSI_CYAN "\x1B" "[1;36m" #define ANSI_GREY "\x1B" "[1;90m" #define COLOR(x) printk(_CONCAT(ANSI_, x)) #else #define COLOR(x) do { } while (false) #endif /* COLOR_PAGE_FRAMES */ /* LCOV_EXCL_START */ static void page_frame_dump(struct k_mem_page_frame *pf) { if (k_mem_page_frame_is_free(pf)) { COLOR(GREY); printk("-"); } else if (k_mem_page_frame_is_reserved(pf)) { COLOR(CYAN); printk("R"); } else if (k_mem_page_frame_is_busy(pf)) { COLOR(MAGENTA); printk("B"); } else if (k_mem_page_frame_is_pinned(pf)) { COLOR(YELLOW); printk("P"); } else if (k_mem_page_frame_is_available(pf)) { COLOR(GREY); printk("."); } else if (k_mem_page_frame_is_mapped(pf)) { COLOR(DEFAULT); printk("M"); } else { COLOR(RED); printk("?"); } } void k_mem_page_frames_dump(void) { int column = 0; __ASSERT(page_frames_initialized, "%s called too early", __func__); printk("Physical memory from 0x%lx to 0x%lx\n", K_MEM_PHYS_RAM_START, K_MEM_PHYS_RAM_END); for (int i = 0; i < K_MEM_NUM_PAGE_FRAMES; i++) { struct k_mem_page_frame *pf = &k_mem_page_frames[i]; page_frame_dump(pf); column++; if (column == 64) { column = 0; printk("\n"); } } COLOR(DEFAULT); if (column != 0) { printk("\n"); } } /* LCOV_EXCL_STOP */ #define VIRT_FOREACH(_base, _size, _pos) \ for ((_pos) = (_base); \ (_pos) < ((uint8_t *)(_base) + (_size)); (_pos) += CONFIG_MMU_PAGE_SIZE) #define PHYS_FOREACH(_base, _size, _pos) \ for ((_pos) = (_base); \ (_pos) < ((uintptr_t)(_base) + (_size)); (_pos) += CONFIG_MMU_PAGE_SIZE) /* * Virtual address space management * * Call all of these functions with z_mm_lock held. * * Overall virtual memory map: When the kernel starts, it resides in * virtual memory in the region K_MEM_KERNEL_VIRT_START to * K_MEM_KERNEL_VIRT_END. Unused virtual memory past this, up to the limit * noted by CONFIG_KERNEL_VM_SIZE may be used for runtime memory mappings. * * If CONFIG_ARCH_MAPS_ALL_RAM is set, we do not just map the kernel image, * but have a mapping for all RAM in place. This is for special architectural * purposes and does not otherwise affect page frame accounting or flags; * the only guarantee is that such RAM mapping outside of the Zephyr image * won't be disturbed by subsequent memory mapping calls. * * +--------------+ <- K_MEM_VIRT_RAM_START * | Undefined VM | <- May contain ancillary regions like x86_64's locore * +--------------+ <- K_MEM_KERNEL_VIRT_START (often == K_MEM_VIRT_RAM_START) * | Mapping for | * | main kernel | * | image | * | | * | | * +--------------+ <- K_MEM_VM_FREE_START * | | * | Unused, | * | Available VM | * | | * |..............| <- mapping_pos (grows downward as more mappings are made) * | Mapping | * +--------------+ * | Mapping | * +--------------+ * | ... | * +--------------+ * | Mapping | * +--------------+ <- mappings start here * | Reserved | <- special purpose virtual page(s) of size K_MEM_VM_RESERVED * +--------------+ <- K_MEM_VIRT_RAM_END */ /* Bitmap of virtual addresses where one bit corresponds to one page. * This is being used for virt_region_alloc() to figure out which * region of virtual addresses can be used for memory mapping. * * Note that bit #0 is the highest address so that allocation is * done in reverse from highest address. */ SYS_BITARRAY_DEFINE_STATIC(virt_region_bitmap, CONFIG_KERNEL_VM_SIZE / CONFIG_MMU_PAGE_SIZE); static bool virt_region_inited; #define Z_VIRT_REGION_START_ADDR K_MEM_VM_FREE_START #define Z_VIRT_REGION_END_ADDR (K_MEM_VIRT_RAM_END - K_MEM_VM_RESERVED) static inline uintptr_t virt_from_bitmap_offset(size_t offset, size_t size) { return POINTER_TO_UINT(K_MEM_VIRT_RAM_END) - (offset * CONFIG_MMU_PAGE_SIZE) - size; } static inline size_t virt_to_bitmap_offset(void *vaddr, size_t size) { return (POINTER_TO_UINT(K_MEM_VIRT_RAM_END) - POINTER_TO_UINT(vaddr) - size) / CONFIG_MMU_PAGE_SIZE; } static void virt_region_init(void) { size_t offset, num_bits; /* There are regions where we should never map via * k_mem_map() and k_mem_map_phys_bare(). Mark them as * already allocated so they will never be used. */ if (K_MEM_VM_RESERVED > 0) { /* Mark reserved region at end of virtual address space */ num_bits = K_MEM_VM_RESERVED / CONFIG_MMU_PAGE_SIZE; (void)sys_bitarray_set_region(&virt_region_bitmap, num_bits, 0); } /* Mark all bits up to Z_FREE_VM_START as allocated */ num_bits = POINTER_TO_UINT(K_MEM_VM_FREE_START) - POINTER_TO_UINT(K_MEM_VIRT_RAM_START); offset = virt_to_bitmap_offset(K_MEM_VIRT_RAM_START, num_bits); num_bits /= CONFIG_MMU_PAGE_SIZE; (void)sys_bitarray_set_region(&virt_region_bitmap, num_bits, offset); virt_region_inited = true; } static void virt_region_free(void *vaddr, size_t size) { size_t offset, num_bits; uint8_t *vaddr_u8 = (uint8_t *)vaddr; if (unlikely(!virt_region_inited)) { virt_region_init(); } #ifndef CONFIG_KERNEL_DIRECT_MAP /* Without the need to support K_MEM_DIRECT_MAP, the region must be * able to be represented in the bitmap. So this case is * simple. */ __ASSERT((vaddr_u8 >= Z_VIRT_REGION_START_ADDR) && ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR), "invalid virtual address region %p (%zu)", vaddr_u8, size); if (!((vaddr_u8 >= Z_VIRT_REGION_START_ADDR) && ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR))) { return; } offset = virt_to_bitmap_offset(vaddr, size); num_bits = size / CONFIG_MMU_PAGE_SIZE; (void)sys_bitarray_free(&virt_region_bitmap, num_bits, offset); #else /* !CONFIG_KERNEL_DIRECT_MAP */ /* With K_MEM_DIRECT_MAP, the region can be outside of the virtual * memory space, wholly within it, or overlap partially. * So additional processing is needed to make sure we only * mark the pages within the bitmap. */ if (((vaddr_u8 >= Z_VIRT_REGION_START_ADDR) && (vaddr_u8 < Z_VIRT_REGION_END_ADDR)) || (((vaddr_u8 + size - 1) >= Z_VIRT_REGION_START_ADDR) && ((vaddr_u8 + size - 1) < Z_VIRT_REGION_END_ADDR))) { uint8_t *adjusted_start = MAX(vaddr_u8, Z_VIRT_REGION_START_ADDR); uint8_t *adjusted_end = MIN(vaddr_u8 + size, Z_VIRT_REGION_END_ADDR); size_t adjusted_sz = adjusted_end - adjusted_start; offset = virt_to_bitmap_offset(adjusted_start, adjusted_sz); num_bits = adjusted_sz / CONFIG_MMU_PAGE_SIZE; (void)sys_bitarray_free(&virt_region_bitmap, num_bits, offset); } #endif /* !CONFIG_KERNEL_DIRECT_MAP */ } static void *virt_region_alloc(size_t size, size_t align) { uintptr_t dest_addr; size_t alloc_size; size_t offset; size_t num_bits; int ret; if (unlikely(!virt_region_inited)) { virt_region_init(); } /* Possibly request more pages to ensure we can get an aligned virtual address */ num_bits = (size + align - CONFIG_MMU_PAGE_SIZE) / CONFIG_MMU_PAGE_SIZE; alloc_size = num_bits * CONFIG_MMU_PAGE_SIZE; ret = sys_bitarray_alloc(&virt_region_bitmap, num_bits, &offset); if (ret != 0) { LOG_ERR("insufficient virtual address space (requested %zu)", size); return NULL; } /* Remember that bit #0 in bitmap corresponds to the highest * virtual address. So here we need to go downwards (backwards?) * to get the starting address of the allocated region. */ dest_addr = virt_from_bitmap_offset(offset, alloc_size); if (alloc_size > size) { uintptr_t aligned_dest_addr = ROUND_UP(dest_addr, align); /* Here is the memory organization when trying to get an aligned * virtual address: * * +--------------+ <- K_MEM_VIRT_RAM_START * | Undefined VM | * +--------------+ <- K_MEM_KERNEL_VIRT_START (often == K_MEM_VIRT_RAM_START) * | Mapping for | * | main kernel | * | image | * | | * | | * +--------------+ <- K_MEM_VM_FREE_START * | ... | * +==============+ <- dest_addr * | Unused | * |..............| <- aligned_dest_addr * | | * | Aligned | * | Mapping | * | | * |..............| <- aligned_dest_addr + size * | Unused | * +==============+ <- offset from K_MEM_VIRT_RAM_END == dest_addr + alloc_size * | ... | * +--------------+ * | Mapping | * +--------------+ * | Reserved | * +--------------+ <- K_MEM_VIRT_RAM_END */ /* Free the two unused regions */ virt_region_free(UINT_TO_POINTER(dest_addr), aligned_dest_addr - dest_addr); if (((dest_addr + alloc_size) - (aligned_dest_addr + size)) > 0) { virt_region_free(UINT_TO_POINTER(aligned_dest_addr + size), (dest_addr + alloc_size) - (aligned_dest_addr + size)); } dest_addr = aligned_dest_addr; } /* Need to make sure this does not step into kernel memory */ if (dest_addr < POINTER_TO_UINT(Z_VIRT_REGION_START_ADDR)) { (void)sys_bitarray_free(&virt_region_bitmap, size, offset); return NULL; } return UINT_TO_POINTER(dest_addr); } /* * Free page frames management * * Call all of these functions with z_mm_lock held. */ /* Linked list of unused and available page frames. * * TODO: This is very simple and treats all free page frames as being equal. * However, there are use-cases to consolidate free pages such that entire * SRAM banks can be switched off to save power, and so obtaining free pages * may require a more complex ontology which prefers page frames in RAM banks * which are still active. * * This implies in the future there may be multiple slists managing physical * pages. Each page frame will still just have one snode link. */ static sys_sflist_t free_page_frame_list; /* Number of unused and available free page frames. * This information may go stale immediately. */ static size_t z_free_page_count; #define PF_ASSERT(pf, expr, fmt, ...) \ __ASSERT(expr, "page frame 0x%lx: " fmt, k_mem_page_frame_to_phys(pf), \ ##__VA_ARGS__) /* Get an unused page frame. don't care which one, or NULL if there are none */ static struct k_mem_page_frame *free_page_frame_list_get(void) { sys_sfnode_t *node; struct k_mem_page_frame *pf = NULL; node = sys_sflist_get(&free_page_frame_list); if (node != NULL) { z_free_page_count--; pf = CONTAINER_OF(node, struct k_mem_page_frame, node); PF_ASSERT(pf, k_mem_page_frame_is_free(pf), "on free list but not free"); pf->va_and_flags = 0; } return pf; } /* Release a page frame back into the list of free pages */ static void free_page_frame_list_put(struct k_mem_page_frame *pf) { PF_ASSERT(pf, k_mem_page_frame_is_available(pf), "unavailable page put on free list"); sys_sfnode_init(&pf->node, K_MEM_PAGE_FRAME_FREE); sys_sflist_append(&free_page_frame_list, &pf->node); z_free_page_count++; } static void free_page_frame_list_init(void) { sys_sflist_init(&free_page_frame_list); } static void page_frame_free_locked(struct k_mem_page_frame *pf) { pf->va_and_flags = 0; free_page_frame_list_put(pf); } /* * Memory Mapping */ /* Called after the frame is mapped in the arch layer, to update our * local ontology (and do some assertions while we're at it) */ static void frame_mapped_set(struct k_mem_page_frame *pf, void *addr) { PF_ASSERT(pf, !k_mem_page_frame_is_free(pf), "attempted to map a page frame on the free list"); PF_ASSERT(pf, !k_mem_page_frame_is_reserved(pf), "attempted to map a reserved page frame"); /* We do allow multiple mappings for pinned page frames * since we will never need to reverse map them. * This is uncommon, use-cases are for things like the * Zephyr equivalent of VSDOs */ PF_ASSERT(pf, !k_mem_page_frame_is_mapped(pf) || k_mem_page_frame_is_pinned(pf), "non-pinned and already mapped to %p", k_mem_page_frame_to_virt(pf)); uintptr_t flags_mask = CONFIG_MMU_PAGE_SIZE - 1; uintptr_t va = (uintptr_t)addr & ~flags_mask; pf->va_and_flags &= flags_mask; pf->va_and_flags |= va | K_MEM_PAGE_FRAME_MAPPED; } /* LCOV_EXCL_START */ /* Go through page frames to find the physical address mapped * by a virtual address. * * @param[in] virt Virtual Address * @param[out] phys Physical address mapped to the input virtual address * if such mapping exists. * * @retval 0 if mapping is found and valid * @retval -EFAULT if virtual address is not mapped */ static int virt_to_page_frame(void *virt, uintptr_t *phys) { uintptr_t paddr; struct k_mem_page_frame *pf; int ret = -EFAULT; K_MEM_PAGE_FRAME_FOREACH(paddr, pf) { if (k_mem_page_frame_is_mapped(pf)) { if (virt == k_mem_page_frame_to_virt(pf)) { ret = 0; if (phys != NULL) { *phys = k_mem_page_frame_to_phys(pf); } break; } } } return ret; } /* LCOV_EXCL_STOP */ __weak FUNC_ALIAS(virt_to_page_frame, arch_page_phys_get, int); #ifdef CONFIG_DEMAND_PAGING static int page_frame_prepare_locked(struct k_mem_page_frame *pf, bool *dirty_ptr, bool page_in, uintptr_t *location_ptr); static inline void do_backing_store_page_in(uintptr_t location); static inline void do_backing_store_page_out(uintptr_t location); #endif /* CONFIG_DEMAND_PAGING */ /* Allocate a free page frame, and map it to a specified virtual address * * TODO: Add optional support for copy-on-write mappings to a zero page instead * of allocating, in which case page frames will be allocated lazily as * the mappings to the zero page get touched. This will avoid expensive * page-ins as memory is mapped and physical RAM or backing store storage will * not be used if the mapped memory is unused. The cost is an empty physical * page of zeroes. */ static int map_anon_page(void *addr, uint32_t flags) { struct k_mem_page_frame *pf; uintptr_t phys; bool lock = (flags & K_MEM_MAP_LOCK) != 0U; pf = free_page_frame_list_get(); if (pf == NULL) { #ifdef CONFIG_DEMAND_PAGING uintptr_t location; bool dirty; int ret; pf = k_mem_paging_eviction_select(&dirty); __ASSERT(pf != NULL, "failed to get a page frame"); LOG_DBG("evicting %p at 0x%lx", k_mem_page_frame_to_virt(pf), k_mem_page_frame_to_phys(pf)); ret = page_frame_prepare_locked(pf, &dirty, false, &location); if (ret != 0) { return -ENOMEM; } if (dirty) { do_backing_store_page_out(location); } pf->va_and_flags = 0; #else return -ENOMEM; #endif /* CONFIG_DEMAND_PAGING */ } phys = k_mem_page_frame_to_phys(pf); arch_mem_map(addr, phys, CONFIG_MMU_PAGE_SIZE, flags); if (lock) { k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_PINNED); } frame_mapped_set(pf, addr); #ifdef CONFIG_DEMAND_PAGING if (!lock) { k_mem_paging_eviction_add(pf); } #endif LOG_DBG("memory mapping anon page %p -> 0x%lx", addr, phys); return 0; } void *k_mem_map_phys_guard(uintptr_t phys, size_t size, uint32_t flags, bool is_anon) { uint8_t *dst; size_t total_size; int ret; k_spinlock_key_t key; uint8_t *pos; bool uninit = (flags & K_MEM_MAP_UNINIT) != 0U; __ASSERT(!is_anon || (is_anon && page_frames_initialized), "%s called too early", __func__); __ASSERT((flags & K_MEM_CACHE_MASK) == 0U, "%s does not support explicit cache settings", __func__); if (((flags & K_MEM_PERM_USER) != 0U) && ((flags & K_MEM_MAP_UNINIT) != 0U)) { LOG_ERR("user access to anonymous uninitialized pages is forbidden"); return NULL; } if ((size % CONFIG_MMU_PAGE_SIZE) != 0U) { LOG_ERR("unaligned size %zu passed to %s", size, __func__); return NULL; } if (size == 0) { LOG_ERR("zero sized memory mapping"); return NULL; } /* Need extra for the guard pages (before and after) which we * won't map. */ if (size_add_overflow(size, CONFIG_MMU_PAGE_SIZE * 2, &total_size)) { LOG_ERR("too large size %zu passed to %s", size, __func__); return NULL; } key = k_spin_lock(&z_mm_lock); dst = virt_region_alloc(total_size, CONFIG_MMU_PAGE_SIZE); if (dst == NULL) { /* Address space has no free region */ goto out; } /* Unmap both guard pages to make sure accessing them * will generate fault. */ arch_mem_unmap(dst, CONFIG_MMU_PAGE_SIZE); arch_mem_unmap(dst + CONFIG_MMU_PAGE_SIZE + size, CONFIG_MMU_PAGE_SIZE); /* Skip over the "before" guard page in returned address. */ dst += CONFIG_MMU_PAGE_SIZE; if (is_anon) { /* Mapping from anonymous memory */ flags |= K_MEM_CACHE_WB; #ifdef CONFIG_DEMAND_MAPPING if ((flags & K_MEM_MAP_LOCK) == 0) { flags |= K_MEM_MAP_UNPAGED; VIRT_FOREACH(dst, size, pos) { arch_mem_map(pos, uninit ? ARCH_UNPAGED_ANON_UNINIT : ARCH_UNPAGED_ANON_ZERO, CONFIG_MMU_PAGE_SIZE, flags); } LOG_DBG("memory mapping anon pages %p to %p unpaged", dst, pos-1); /* skip the memset() below */ uninit = true; } else #endif { VIRT_FOREACH(dst, size, pos) { ret = map_anon_page(pos, flags); if (ret != 0) { /* TODO: * call k_mem_unmap(dst, pos - dst) * when implemented in #28990 and * release any guard virtual page as well. */ dst = NULL; goto out; } } } } else { /* Mapping known physical memory. * * arch_mem_map() is a void function and does not return * anything. Arch code usually uses ASSERT() to catch * mapping errors. Assume this works correctly for now. */ arch_mem_map(dst, phys, size, flags); } out: k_spin_unlock(&z_mm_lock, key); if (dst != NULL && !uninit) { /* If we later implement mappings to a copy-on-write * zero page, won't need this step */ memset(dst, 0, size); } return dst; } void k_mem_unmap_phys_guard(void *addr, size_t size, bool is_anon) { uintptr_t phys; uint8_t *pos; struct k_mem_page_frame *pf; k_spinlock_key_t key; size_t total_size; int ret; /* Need space for the "before" guard page */ __ASSERT_NO_MSG(POINTER_TO_UINT(addr) >= CONFIG_MMU_PAGE_SIZE); /* Make sure address range is still valid after accounting * for two guard pages. */ pos = (uint8_t *)addr - CONFIG_MMU_PAGE_SIZE; k_mem_assert_virtual_region(pos, size + (CONFIG_MMU_PAGE_SIZE * 2)); key = k_spin_lock(&z_mm_lock); /* Check if both guard pages are unmapped. * Bail if not, as this is probably a region not mapped * using k_mem_map(). */ pos = addr; ret = arch_page_phys_get(pos - CONFIG_MMU_PAGE_SIZE, NULL); if (ret == 0) { __ASSERT(ret == 0, "%s: cannot find preceding guard page for (%p, %zu)", __func__, addr, size); goto out; } ret = arch_page_phys_get(pos + size, NULL); if (ret == 0) { __ASSERT(ret == 0, "%s: cannot find succeeding guard page for (%p, %zu)", __func__, addr, size); goto out; } if (is_anon) { /* Unmapping anonymous memory */ VIRT_FOREACH(addr, size, pos) { #ifdef CONFIG_DEMAND_PAGING enum arch_page_location status; uintptr_t location; status = arch_page_location_get(pos, &location); switch (status) { case ARCH_PAGE_LOCATION_PAGED_OUT: /* * No pf is associated with this mapping. * Simply get rid of the MMU entry and free * corresponding backing store. */ arch_mem_unmap(pos, CONFIG_MMU_PAGE_SIZE); k_mem_paging_backing_store_location_free(location); continue; case ARCH_PAGE_LOCATION_PAGED_IN: /* * The page is in memory but it may not be * accessible in order to manage tracking * of the ARCH_DATA_PAGE_ACCESSED flag * meaning arch_page_phys_get() could fail. * Still, we know the actual phys address. */ phys = location; ret = 0; break; default: ret = arch_page_phys_get(pos, &phys); break; } #else ret = arch_page_phys_get(pos, &phys); #endif __ASSERT(ret == 0, "%s: cannot unmap an unmapped address %p", __func__, pos); if (ret != 0) { /* Found an address not mapped. Do not continue. */ goto out; } __ASSERT(k_mem_is_page_frame(phys), "%s: 0x%lx is not a page frame", __func__, phys); if (!k_mem_is_page_frame(phys)) { /* Physical address has no corresponding page frame * description in the page frame array. * This should not happen. Do not continue. */ goto out; } /* Grab the corresponding page frame from physical address */ pf = k_mem_phys_to_page_frame(phys); __ASSERT(k_mem_page_frame_is_mapped(pf), "%s: 0x%lx is not a mapped page frame", __func__, phys); if (!k_mem_page_frame_is_mapped(pf)) { /* Page frame is not marked mapped. * This should not happen. Do not continue. */ goto out; } arch_mem_unmap(pos, CONFIG_MMU_PAGE_SIZE); #ifdef CONFIG_DEMAND_PAGING if (!k_mem_page_frame_is_pinned(pf)) { k_mem_paging_eviction_remove(pf); } #endif /* Put the page frame back into free list */ page_frame_free_locked(pf); } } else { /* * Unmapping previous mapped memory with specific physical address. * * Note that we don't have to unmap the guard pages, as they should * have been unmapped. We just need to unmapped the in-between * region [addr, (addr + size)). */ arch_mem_unmap(addr, size); } /* There are guard pages just before and after the mapped * region. So we also need to free them from the bitmap. */ pos = (uint8_t *)addr - CONFIG_MMU_PAGE_SIZE; total_size = size + (CONFIG_MMU_PAGE_SIZE * 2); virt_region_free(pos, total_size); out: k_spin_unlock(&z_mm_lock, key); } int k_mem_update_flags(void *addr, size_t size, uint32_t flags) { uintptr_t phys; k_spinlock_key_t key; int ret; k_mem_assert_virtual_region(addr, size); key = k_spin_lock(&z_mm_lock); /* * We can achieve desired result without explicit architecture support * by unmapping and remapping the same physical memory using new flags. */ ret = arch_page_phys_get(addr, &phys); if (ret < 0) { goto out; } /* TODO: detect and handle paged-out memory as well */ arch_mem_unmap(addr, size); arch_mem_map(addr, phys, size, flags); out: k_spin_unlock(&z_mm_lock, key); return ret; } size_t k_mem_free_get(void) { size_t ret; k_spinlock_key_t key; __ASSERT(page_frames_initialized, "%s called too early", __func__); key = k_spin_lock(&z_mm_lock); #ifdef CONFIG_DEMAND_PAGING if (z_free_page_count > CONFIG_DEMAND_PAGING_PAGE_FRAMES_RESERVE) { ret = z_free_page_count - CONFIG_DEMAND_PAGING_PAGE_FRAMES_RESERVE; } else { ret = 0; } #else ret = z_free_page_count; #endif /* CONFIG_DEMAND_PAGING */ k_spin_unlock(&z_mm_lock, key); return ret * (size_t)CONFIG_MMU_PAGE_SIZE; } /* Get the default virtual region alignment, here the default MMU page size * * @param[in] phys Physical address of region to be mapped, aligned to MMU_PAGE_SIZE * @param[in] size Size of region to be mapped, aligned to MMU_PAGE_SIZE * * @retval alignment to apply on the virtual address of this region */ static size_t virt_region_align(uintptr_t phys, size_t size) { ARG_UNUSED(phys); ARG_UNUSED(size); return CONFIG_MMU_PAGE_SIZE; } __weak FUNC_ALIAS(virt_region_align, arch_virt_region_align, size_t); /* This may be called from arch early boot code before z_cstart() is invoked. * Data will be copied and BSS zeroed, but this must not rely on any * initialization functions being called prior to work correctly. */ void k_mem_map_phys_bare(uint8_t **virt_ptr, uintptr_t phys, size_t size, uint32_t flags) { uintptr_t aligned_phys, addr_offset; size_t aligned_size, align_boundary; k_spinlock_key_t key; uint8_t *dest_addr; size_t num_bits; size_t offset; #ifndef CONFIG_KERNEL_DIRECT_MAP __ASSERT(!(flags & K_MEM_DIRECT_MAP), "The direct-map is not enabled"); #endif /* CONFIG_KERNEL_DIRECT_MAP */ addr_offset = k_mem_region_align(&aligned_phys, &aligned_size, phys, size, CONFIG_MMU_PAGE_SIZE); __ASSERT(aligned_size != 0U, "0-length mapping at 0x%lx", aligned_phys); __ASSERT(aligned_phys < (aligned_phys + (aligned_size - 1)), "wraparound for physical address 0x%lx (size %zu)", aligned_phys, aligned_size); align_boundary = arch_virt_region_align(aligned_phys, aligned_size); key = k_spin_lock(&z_mm_lock); if (IS_ENABLED(CONFIG_KERNEL_DIRECT_MAP) && (flags & K_MEM_DIRECT_MAP)) { dest_addr = (uint8_t *)aligned_phys; /* Mark the region of virtual memory bitmap as used * if the region overlaps the virtual memory space. * * Basically if either end of region is within * virtual memory space, we need to mark the bits. */ if (IN_RANGE(aligned_phys, (uintptr_t)K_MEM_VIRT_RAM_START, (uintptr_t)(K_MEM_VIRT_RAM_END - 1)) || IN_RANGE(aligned_phys + aligned_size - 1, (uintptr_t)K_MEM_VIRT_RAM_START, (uintptr_t)(K_MEM_VIRT_RAM_END - 1))) { uint8_t *adjusted_start = MAX(dest_addr, K_MEM_VIRT_RAM_START); uint8_t *adjusted_end = MIN(dest_addr + aligned_size, K_MEM_VIRT_RAM_END); size_t adjusted_sz = adjusted_end - adjusted_start; num_bits = adjusted_sz / CONFIG_MMU_PAGE_SIZE; offset = virt_to_bitmap_offset(adjusted_start, adjusted_sz); if (sys_bitarray_test_and_set_region( &virt_region_bitmap, num_bits, offset, true)) { goto fail; } } } else { /* Obtain an appropriately sized chunk of virtual memory */ dest_addr = virt_region_alloc(aligned_size, align_boundary); if (!dest_addr) { goto fail; } } /* If this fails there's something amiss with virt_region_get */ __ASSERT((uintptr_t)dest_addr < ((uintptr_t)dest_addr + (size - 1)), "wraparound for virtual address %p (size %zu)", dest_addr, size); LOG_DBG("arch_mem_map(%p, 0x%lx, %zu, %x) offset %lu", dest_addr, aligned_phys, aligned_size, flags, addr_offset); arch_mem_map(dest_addr, aligned_phys, aligned_size, flags); k_spin_unlock(&z_mm_lock, key); *virt_ptr = dest_addr + addr_offset; return; fail: /* May re-visit this in the future, but for now running out of * virtual address space or failing the arch_mem_map() call is * an unrecoverable situation. * * Other problems not related to resource exhaustion we leave as * assertions since they are clearly programming mistakes. */ LOG_ERR("memory mapping 0x%lx (size %zu, flags 0x%x) failed", phys, size, flags); k_panic(); } void k_mem_unmap_phys_bare(uint8_t *virt, size_t size) { uintptr_t aligned_virt, addr_offset; size_t aligned_size; k_spinlock_key_t key; addr_offset = k_mem_region_align(&aligned_virt, &aligned_size, POINTER_TO_UINT(virt), size, CONFIG_MMU_PAGE_SIZE); __ASSERT(aligned_size != 0U, "0-length mapping at 0x%lx", aligned_virt); __ASSERT(aligned_virt < (aligned_virt + (aligned_size - 1)), "wraparound for virtual address 0x%lx (size %zu)", aligned_virt, aligned_size); key = k_spin_lock(&z_mm_lock); LOG_DBG("arch_mem_unmap(0x%lx, %zu) offset %lu", aligned_virt, aligned_size, addr_offset); arch_mem_unmap(UINT_TO_POINTER(aligned_virt), aligned_size); virt_region_free(UINT_TO_POINTER(aligned_virt), aligned_size); k_spin_unlock(&z_mm_lock, key); } /* * Miscellaneous */ size_t k_mem_region_align(uintptr_t *aligned_addr, size_t *aligned_size, uintptr_t addr, size_t size, size_t align) { size_t addr_offset; /* The actual mapped region must be page-aligned. Round down the * physical address and pad the region size appropriately */ *aligned_addr = ROUND_DOWN(addr, align); addr_offset = addr - *aligned_addr; *aligned_size = ROUND_UP(size + addr_offset, align); return addr_offset; } #if defined(CONFIG_LINKER_USE_BOOT_SECTION) || defined(CONFIG_LINKER_USE_PINNED_SECTION) static void mark_linker_section_pinned(void *start_addr, void *end_addr, bool pin) { struct k_mem_page_frame *pf; uint8_t *addr; uintptr_t pinned_start = ROUND_DOWN(POINTER_TO_UINT(start_addr), CONFIG_MMU_PAGE_SIZE); uintptr_t pinned_end = ROUND_UP(POINTER_TO_UINT(end_addr), CONFIG_MMU_PAGE_SIZE); size_t pinned_size = pinned_end - pinned_start; VIRT_FOREACH(UINT_TO_POINTER(pinned_start), pinned_size, addr) { pf = k_mem_phys_to_page_frame(K_MEM_BOOT_VIRT_TO_PHYS(addr)); frame_mapped_set(pf, addr); if (pin) { k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_PINNED); } else { k_mem_page_frame_clear(pf, K_MEM_PAGE_FRAME_PINNED); #ifdef CONFIG_DEMAND_PAGING if (k_mem_page_frame_is_evictable(pf)) { k_mem_paging_eviction_add(pf); } #endif } } } #endif /* CONFIG_LINKER_USE_BOOT_SECTION) || CONFIG_LINKER_USE_PINNED_SECTION */ #ifdef CONFIG_LINKER_USE_ONDEMAND_SECTION static void z_paging_ondemand_section_map(void) { uint8_t *addr; size_t size; uintptr_t location; uint32_t flags; size = (uintptr_t)lnkr_ondemand_text_size; flags = K_MEM_MAP_UNPAGED | K_MEM_PERM_EXEC | K_MEM_CACHE_WB; VIRT_FOREACH(lnkr_ondemand_text_start, size, addr) { k_mem_paging_backing_store_location_query(addr, &location); arch_mem_map(addr, location, CONFIG_MMU_PAGE_SIZE, flags); sys_bitarray_set_region(&virt_region_bitmap, 1, virt_to_bitmap_offset(addr, CONFIG_MMU_PAGE_SIZE)); } size = (uintptr_t)lnkr_ondemand_rodata_size; flags = K_MEM_MAP_UNPAGED | K_MEM_CACHE_WB; VIRT_FOREACH(lnkr_ondemand_rodata_start, size, addr) { k_mem_paging_backing_store_location_query(addr, &location); arch_mem_map(addr, location, CONFIG_MMU_PAGE_SIZE, flags); sys_bitarray_set_region(&virt_region_bitmap, 1, virt_to_bitmap_offset(addr, CONFIG_MMU_PAGE_SIZE)); } } #endif /* CONFIG_LINKER_USE_ONDEMAND_SECTION */ void z_mem_manage_init(void) { uintptr_t phys; uint8_t *addr; struct k_mem_page_frame *pf; k_spinlock_key_t key = k_spin_lock(&z_mm_lock); free_page_frame_list_init(); ARG_UNUSED(addr); #ifdef CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES /* If some page frames are unavailable for use as memory, arch * code will mark K_MEM_PAGE_FRAME_RESERVED in their flags */ arch_reserved_pages_update(); #endif /* CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES */ #ifdef CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT /* All pages composing the Zephyr image are mapped at boot in a * predictable way. This can change at runtime. */ VIRT_FOREACH(K_MEM_KERNEL_VIRT_START, K_MEM_KERNEL_VIRT_SIZE, addr) { pf = k_mem_phys_to_page_frame(K_MEM_BOOT_VIRT_TO_PHYS(addr)); frame_mapped_set(pf, addr); /* TODO: for now we pin the whole Zephyr image. Demand paging * currently tested with anonymously-mapped pages which are not * pinned. * * We will need to setup linker regions for a subset of kernel * code/data pages which are pinned in memory and * may not be evicted. This will contain critical CPU data * structures, and any code used to perform page fault * handling, page-ins, etc. */ k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_PINNED); } #endif /* CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT */ #ifdef CONFIG_LINKER_USE_BOOT_SECTION /* Pin the boot section to prevent it from being swapped out during * boot process. Will be un-pinned once boot process completes. */ mark_linker_section_pinned(lnkr_boot_start, lnkr_boot_end, true); #endif /* CONFIG_LINKER_USE_BOOT_SECTION */ #ifdef CONFIG_LINKER_USE_PINNED_SECTION /* Pin the page frames correspondng to the pinned symbols */ mark_linker_section_pinned(lnkr_pinned_start, lnkr_pinned_end, true); #endif /* CONFIG_LINKER_USE_PINNED_SECTION */ /* Any remaining pages that aren't mapped, reserved, or pinned get * added to the free pages list */ K_MEM_PAGE_FRAME_FOREACH(phys, pf) { if (k_mem_page_frame_is_available(pf)) { free_page_frame_list_put(pf); } } LOG_DBG("free page frames: %zu", z_free_page_count); #ifdef CONFIG_DEMAND_PAGING #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM z_paging_histogram_init(); #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */ k_mem_paging_backing_store_init(); k_mem_paging_eviction_init(); /* start tracking evictable page installed above if any */ K_MEM_PAGE_FRAME_FOREACH(phys, pf) { if (k_mem_page_frame_is_evictable(pf)) { k_mem_paging_eviction_add(pf); } } #endif /* CONFIG_DEMAND_PAGING */ #ifdef CONFIG_LINKER_USE_ONDEMAND_SECTION z_paging_ondemand_section_map(); #endif #if __ASSERT_ON page_frames_initialized = true; #endif k_spin_unlock(&z_mm_lock, key); #ifndef CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT /* If BSS section is not present in memory at boot, * it would not have been cleared. This needs to be * done now since paging mechanism has been initialized * and the BSS pages can be brought into physical * memory to be cleared. */ z_bss_zero(); #endif /* CONFIG_LINKER_GENERIC_SECTIONS_PRESENT_AT_BOOT */ } void z_mem_manage_boot_finish(void) { #ifdef CONFIG_LINKER_USE_BOOT_SECTION /* At the end of boot process, unpin the boot sections * as they don't need to be in memory all the time anymore. */ mark_linker_section_pinned(lnkr_boot_start, lnkr_boot_end, false); #endif /* CONFIG_LINKER_USE_BOOT_SECTION */ } #ifdef CONFIG_DEMAND_PAGING #ifdef CONFIG_DEMAND_PAGING_STATS struct k_mem_paging_stats_t paging_stats; extern struct k_mem_paging_histogram_t z_paging_histogram_eviction; extern struct k_mem_paging_histogram_t z_paging_histogram_backing_store_page_in; extern struct k_mem_paging_histogram_t z_paging_histogram_backing_store_page_out; #endif /* CONFIG_DEMAND_PAGING_STATS */ static inline void do_backing_store_page_in(uintptr_t location) { #ifdef CONFIG_DEMAND_MAPPING /* Check for special cases */ switch (location) { case ARCH_UNPAGED_ANON_ZERO: memset(K_MEM_SCRATCH_PAGE, 0, CONFIG_MMU_PAGE_SIZE); __fallthrough; case ARCH_UNPAGED_ANON_UNINIT: /* nothing else to do */ return; default: break; } #endif /* CONFIG_DEMAND_MAPPING */ #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM uint32_t time_diff; #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS timing_t time_start, time_end; time_start = timing_counter_get(); #else uint32_t time_start; time_start = k_cycle_get_32(); #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */ #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */ k_mem_paging_backing_store_page_in(location); #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS time_end = timing_counter_get(); time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end); #else time_diff = k_cycle_get_32() - time_start; #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */ z_paging_histogram_inc(&z_paging_histogram_backing_store_page_in, time_diff); #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */ } static inline void do_backing_store_page_out(uintptr_t location) { #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM uint32_t time_diff; #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS timing_t time_start, time_end; time_start = timing_counter_get(); #else uint32_t time_start; time_start = k_cycle_get_32(); #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */ #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */ k_mem_paging_backing_store_page_out(location); #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS time_end = timing_counter_get(); time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end); #else time_diff = k_cycle_get_32() - time_start; #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */ z_paging_histogram_inc(&z_paging_histogram_backing_store_page_out, time_diff); #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */ } #if defined(CONFIG_SMP) && defined(CONFIG_DEMAND_PAGING_ALLOW_IRQ) /* * SMP support is very simple. Some resources such as the scratch page could * be made per CPU, backing store driver execution be confined to the faulting * CPU, statistics be made to cope with access concurrency, etc. But in the * end we're dealing with memory transfer to/from some external storage which * is inherently slow and whose access is most likely serialized anyway. * So let's simply enforce global demand paging serialization across all CPUs * with a mutex as there is no real gain from added parallelism here. */ static K_MUTEX_DEFINE(z_mm_paging_lock); #endif static void virt_region_foreach(void *addr, size_t size, void (*func)(void *)) { k_mem_assert_virtual_region(addr, size); for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) { func((uint8_t *)addr + offset); } } /* * Perform some preparatory steps before paging out. The provided page frame * must be evicted to the backing store immediately after this is called * with a call to k_mem_paging_backing_store_page_out() if it contains * a data page. * * - Map page frame to scratch area if requested. This always is true if we're * doing a page fault, but is only set on manual evictions if the page is * dirty. * - If mapped: * - obtain backing store location and populate location parameter * - Update page tables with location * - Mark page frame as busy * * Returns -ENOMEM if the backing store is full */ static int page_frame_prepare_locked(struct k_mem_page_frame *pf, bool *dirty_ptr, bool page_fault, uintptr_t *location_ptr) { uintptr_t phys; int ret; bool dirty = *dirty_ptr; phys = k_mem_page_frame_to_phys(pf); __ASSERT(!k_mem_page_frame_is_pinned(pf), "page frame 0x%lx is pinned", phys); /* If the backing store doesn't have a copy of the page, even if it * wasn't modified, treat as dirty. This can happen for a few * reasons: * 1) Page has never been swapped out before, and the backing store * wasn't pre-populated with this data page. * 2) Page was swapped out before, but the page contents were not * preserved after swapping back in. * 3) Page contents were preserved when swapped back in, but were later * evicted from the backing store to make room for other evicted * pages. */ if (k_mem_page_frame_is_mapped(pf)) { dirty = dirty || !k_mem_page_frame_is_backed(pf); } if (dirty || page_fault) { arch_mem_scratch(phys); } if (k_mem_page_frame_is_mapped(pf)) { ret = k_mem_paging_backing_store_location_get(pf, location_ptr, page_fault); if (ret != 0) { LOG_ERR("out of backing store memory"); return -ENOMEM; } arch_mem_page_out(k_mem_page_frame_to_virt(pf), *location_ptr); k_mem_paging_eviction_remove(pf); } else { /* Shouldn't happen unless this function is mis-used */ __ASSERT(!dirty, "un-mapped page determined to be dirty"); } #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ /* Mark as busy so that k_mem_page_frame_is_evictable() returns false */ __ASSERT(!k_mem_page_frame_is_busy(pf), "page frame 0x%lx is already busy", phys); k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_BUSY); #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ /* Update dirty parameter, since we set to true if it wasn't backed * even if otherwise clean */ *dirty_ptr = dirty; return 0; } static int do_mem_evict(void *addr) { bool dirty; struct k_mem_page_frame *pf; uintptr_t location; k_spinlock_key_t key; uintptr_t flags, phys; int ret; #if CONFIG_DEMAND_PAGING_ALLOW_IRQ __ASSERT(!k_is_in_isr(), "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ", __func__); #ifdef CONFIG_SMP k_mutex_lock(&z_mm_paging_lock, K_FOREVER); #else k_sched_lock(); #endif #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ key = k_spin_lock(&z_mm_lock); flags = arch_page_info_get(addr, &phys, false); __ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0, "address %p isn't mapped", addr); if ((flags & ARCH_DATA_PAGE_LOADED) == 0) { /* Un-mapped or already evicted. Nothing to do */ ret = 0; goto out; } dirty = (flags & ARCH_DATA_PAGE_DIRTY) != 0; pf = k_mem_phys_to_page_frame(phys); __ASSERT(k_mem_page_frame_to_virt(pf) == addr, "page frame address mismatch"); ret = page_frame_prepare_locked(pf, &dirty, false, &location); if (ret != 0) { goto out; } __ASSERT(ret == 0, "failed to prepare page frame"); #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ k_spin_unlock(&z_mm_lock, key); #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ if (dirty) { do_backing_store_page_out(location); } #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ key = k_spin_lock(&z_mm_lock); #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ page_frame_free_locked(pf); out: k_spin_unlock(&z_mm_lock, key); #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ #ifdef CONFIG_SMP k_mutex_unlock(&z_mm_paging_lock); #else k_sched_unlock(); #endif #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ return ret; } int k_mem_page_out(void *addr, size_t size) { __ASSERT(page_frames_initialized, "%s called on %p too early", __func__, addr); k_mem_assert_virtual_region(addr, size); for (size_t offset = 0; offset < size; offset += CONFIG_MMU_PAGE_SIZE) { void *pos = (uint8_t *)addr + offset; int ret; ret = do_mem_evict(pos); if (ret != 0) { return ret; } } return 0; } int k_mem_page_frame_evict(uintptr_t phys) { k_spinlock_key_t key; struct k_mem_page_frame *pf; bool dirty; uintptr_t flags; uintptr_t location; int ret; __ASSERT(page_frames_initialized, "%s called on 0x%lx too early", __func__, phys); /* Implementation is similar to do_page_fault() except there is no * data page to page-in, see comments in that function. */ #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ __ASSERT(!k_is_in_isr(), "%s is unavailable in ISRs with CONFIG_DEMAND_PAGING_ALLOW_IRQ", __func__); #ifdef CONFIG_SMP k_mutex_lock(&z_mm_paging_lock, K_FOREVER); #else k_sched_lock(); #endif #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ key = k_spin_lock(&z_mm_lock); pf = k_mem_phys_to_page_frame(phys); if (!k_mem_page_frame_is_mapped(pf)) { /* Nothing to do, free page */ ret = 0; goto out; } flags = arch_page_info_get(k_mem_page_frame_to_virt(pf), NULL, false); /* Shouldn't ever happen */ __ASSERT((flags & ARCH_DATA_PAGE_LOADED) != 0, "data page not loaded"); dirty = (flags & ARCH_DATA_PAGE_DIRTY) != 0; ret = page_frame_prepare_locked(pf, &dirty, false, &location); if (ret != 0) { goto out; } #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ k_spin_unlock(&z_mm_lock, key); #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ if (dirty) { do_backing_store_page_out(location); } #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ k_spin_unlock(&z_mm_lock, key); #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ page_frame_free_locked(pf); out: k_spin_unlock(&z_mm_lock, key); #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ #ifdef CONFIG_SMP k_mutex_unlock(&z_mm_paging_lock); #else k_sched_unlock(); #endif #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ return ret; } static inline void paging_stats_faults_inc(struct k_thread *faulting_thread, int key) { #ifdef CONFIG_DEMAND_PAGING_STATS bool is_irq_unlocked = arch_irq_unlocked(key); paging_stats.pagefaults.cnt++; if (is_irq_unlocked) { paging_stats.pagefaults.irq_unlocked++; } else { paging_stats.pagefaults.irq_locked++; } #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS faulting_thread->paging_stats.pagefaults.cnt++; if (is_irq_unlocked) { faulting_thread->paging_stats.pagefaults.irq_unlocked++; } else { faulting_thread->paging_stats.pagefaults.irq_locked++; } #else ARG_UNUSED(faulting_thread); #endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */ #ifndef CONFIG_DEMAND_PAGING_ALLOW_IRQ if (k_is_in_isr()) { paging_stats.pagefaults.in_isr++; #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS faulting_thread->paging_stats.pagefaults.in_isr++; #endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */ } #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ #endif /* CONFIG_DEMAND_PAGING_STATS */ } static inline void paging_stats_eviction_inc(struct k_thread *faulting_thread, bool dirty) { #ifdef CONFIG_DEMAND_PAGING_STATS if (dirty) { paging_stats.eviction.dirty++; } else { paging_stats.eviction.clean++; } #ifdef CONFIG_DEMAND_PAGING_THREAD_STATS if (dirty) { faulting_thread->paging_stats.eviction.dirty++; } else { faulting_thread->paging_stats.eviction.clean++; } #else ARG_UNUSED(faulting_thread); #endif /* CONFIG_DEMAND_PAGING_THREAD_STATS */ #endif /* CONFIG_DEMAND_PAGING_STATS */ } static inline struct k_mem_page_frame *do_eviction_select(bool *dirty) { struct k_mem_page_frame *pf; #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM uint32_t time_diff; #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS timing_t time_start, time_end; time_start = timing_counter_get(); #else uint32_t time_start; time_start = k_cycle_get_32(); #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */ #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */ pf = k_mem_paging_eviction_select(dirty); #ifdef CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM #ifdef CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS time_end = timing_counter_get(); time_diff = (uint32_t)timing_cycles_get(&time_start, &time_end); #else time_diff = k_cycle_get_32() - time_start; #endif /* CONFIG_DEMAND_PAGING_STATS_USING_TIMING_FUNCTIONS */ z_paging_histogram_inc(&z_paging_histogram_eviction, time_diff); #endif /* CONFIG_DEMAND_PAGING_TIMING_HISTOGRAM */ return pf; } static bool do_page_fault(void *addr, bool pin) { struct k_mem_page_frame *pf; k_spinlock_key_t key; uintptr_t page_in_location, page_out_location; enum arch_page_location status; bool result; bool dirty = false; struct k_thread *faulting_thread; int ret; __ASSERT(page_frames_initialized, "page fault at %p happened too early", addr); LOG_DBG("page fault at %p", addr); /* * TODO: Add performance accounting: * - k_mem_paging_eviction_select() metrics * * periodic timer execution time histogram (if implemented) */ #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ /* * We do re-enable interrupts during the page-in/page-out operation * if and only if interrupts were enabled when the exception was * taken; in this configuration page faults in an ISR are a bug; all * their code/data must be pinned. * * If interrupts were disabled when the exception was taken, the * arch code is responsible for keeping them that way when entering * this function. * * If this is not enabled, then interrupts are always locked for the * entire operation. This is far worse for system interrupt latency * but requires less pinned pages and ISRs may also take page faults. * * On UP we lock the scheduler so that other threads are never * scheduled during the page-in/out operation. Support for * allowing k_mem_paging_backing_store_page_out() and * k_mem_paging_backing_store_page_in() to also sleep and allow * other threads to run (such as in the case where the transfer is * async DMA) is not supported on UP. Even if limited to thread * context, arbitrary memory access triggering exceptions that put * a thread to sleep on a contended page fault operation will break * scheduling assumptions of cooperative threads or threads that * implement critical sections with spinlocks or disabling IRQs. * * On SMP, though, exclusivity cannot be assumed solely from being * a cooperative thread. Another thread with any prio may be running * on another CPU so exclusion must already be enforced by other * means. Therefore trying to prevent scheduling on SMP is pointless, * and k_sched_lock() is equivalent to a no-op on SMP anyway. * As a result, sleeping/rescheduling in the SMP case is fine. */ __ASSERT(!k_is_in_isr(), "ISR page faults are forbidden"); #ifdef CONFIG_SMP k_mutex_lock(&z_mm_paging_lock, K_FOREVER); #else k_sched_lock(); #endif #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ key = k_spin_lock(&z_mm_lock); faulting_thread = _current_cpu->current; status = arch_page_location_get(addr, &page_in_location); if (status == ARCH_PAGE_LOCATION_BAD) { /* Return false to treat as a fatal error */ result = false; goto out; } result = true; if (status == ARCH_PAGE_LOCATION_PAGED_IN) { if (pin) { /* It's a physical memory address */ uintptr_t phys = page_in_location; pf = k_mem_phys_to_page_frame(phys); if (!k_mem_page_frame_is_pinned(pf)) { k_mem_paging_eviction_remove(pf); k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_PINNED); } } /* This if-block is to pin the page if it is * already present in physical memory. There is * no need to go through the following code to * pull in the data pages. So skip to the end. */ goto out; } __ASSERT(status == ARCH_PAGE_LOCATION_PAGED_OUT, "unexpected status value %d", status); paging_stats_faults_inc(faulting_thread, key.key); pf = free_page_frame_list_get(); if (pf == NULL) { /* Need to evict a page frame */ pf = do_eviction_select(&dirty); __ASSERT(pf != NULL, "failed to get a page frame"); LOG_DBG("evicting %p at 0x%lx", k_mem_page_frame_to_virt(pf), k_mem_page_frame_to_phys(pf)); paging_stats_eviction_inc(faulting_thread, dirty); } ret = page_frame_prepare_locked(pf, &dirty, true, &page_out_location); __ASSERT(ret == 0, "failed to prepare page frame"); #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ k_spin_unlock(&z_mm_lock, key); /* Interrupts are now unlocked if they were not locked when we entered * this function, and we may service ISRs. The scheduler is still * locked. */ #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ if (dirty) { do_backing_store_page_out(page_out_location); } do_backing_store_page_in(page_in_location); #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ key = k_spin_lock(&z_mm_lock); k_mem_page_frame_clear(pf, K_MEM_PAGE_FRAME_BUSY); #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ k_mem_page_frame_clear(pf, K_MEM_PAGE_FRAME_MAPPED); frame_mapped_set(pf, addr); if (pin) { k_mem_page_frame_set(pf, K_MEM_PAGE_FRAME_PINNED); } arch_mem_page_in(addr, k_mem_page_frame_to_phys(pf)); k_mem_paging_backing_store_page_finalize(pf, page_in_location); if (!pin) { k_mem_paging_eviction_add(pf); } out: k_spin_unlock(&z_mm_lock, key); #ifdef CONFIG_DEMAND_PAGING_ALLOW_IRQ #ifdef CONFIG_SMP k_mutex_unlock(&z_mm_paging_lock); #else k_sched_unlock(); #endif #endif /* CONFIG_DEMAND_PAGING_ALLOW_IRQ */ return result; } static void do_page_in(void *addr) { bool ret; ret = do_page_fault(addr, false); __ASSERT(ret, "unmapped memory address %p", addr); (void)ret; } void k_mem_page_in(void *addr, size_t size) { __ASSERT(!IS_ENABLED(CONFIG_DEMAND_PAGING_ALLOW_IRQ) || !k_is_in_isr(), "%s may not be called in ISRs if CONFIG_DEMAND_PAGING_ALLOW_IRQ is enabled", __func__); virt_region_foreach(addr, size, do_page_in); } static void do_mem_pin(void *addr) { bool ret; ret = do_page_fault(addr, true); __ASSERT(ret, "unmapped memory address %p", addr); (void)ret; } void k_mem_pin(void *addr, size_t size) { __ASSERT(!IS_ENABLED(CONFIG_DEMAND_PAGING_ALLOW_IRQ) || !k_is_in_isr(), "%s may not be called in ISRs if CONFIG_DEMAND_PAGING_ALLOW_IRQ is enabled", __func__); virt_region_foreach(addr, size, do_mem_pin); } bool k_mem_page_fault(void *addr) { return do_page_fault(addr, false); } static void do_mem_unpin(void *addr) { struct k_mem_page_frame *pf; k_spinlock_key_t key; uintptr_t flags, phys; key = k_spin_lock(&z_mm_lock); flags = arch_page_info_get(addr, &phys, false); __ASSERT((flags & ARCH_DATA_PAGE_NOT_MAPPED) == 0, "invalid data page at %p", addr); if ((flags & ARCH_DATA_PAGE_LOADED) != 0) { pf = k_mem_phys_to_page_frame(phys); if (k_mem_page_frame_is_pinned(pf)) { k_mem_page_frame_clear(pf, K_MEM_PAGE_FRAME_PINNED); k_mem_paging_eviction_add(pf); } } k_spin_unlock(&z_mm_lock, key); } void k_mem_unpin(void *addr, size_t size) { __ASSERT(page_frames_initialized, "%s called on %p too early", __func__, addr); virt_region_foreach(addr, size, do_mem_unpin); } #endif /* CONFIG_DEMAND_PAGING */