/*- * Copyright (c) 2018 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "vmmapi.h" extern char *vmname; #define ALIGN_CHECK(x, align) (((x) & ((align)-1)) ? 1 : 0) #define HUGETLB_LV1 0 #define HUGETLB_LV2 1 #define HUGETLB_LV_MAX 2 #define MAX_PATH_LEN 256 /* HugePage Level 1 for 2M page, Level 2 for 1G page*/ #define SYS_PATH_LV1 "/sys/kernel/mm/hugepages/hugepages-2048kB/" #define SYS_PATH_LV2 "/sys/kernel/mm/hugepages/hugepages-1048576kB/" #define SYS_NR_HUGEPAGES "nr_hugepages" #define SYS_FREE_HUGEPAGES "free_hugepages" /* File used for lock between different processes access to hugetlbfs. * We observed when access hugetlbfs from different process to allocate * huge page at the same time could fail. So use file lock here to make * sure hugetlbfs is accessed sequentially. * * We use file range (0..9) for hugetlbfs access lock. */ #define ACRN_HUGETLB_LOCK_DIR "/run/hugepage/acrn" #define ACRN_HUGETLB_LOCK_FILE "/run/hugepage/acrn/lock" #define LOCK_OFFSET_START 0 #define LOCK_OFFSET_END 10 /* hugetlb_info record private information for one specific hugetlbfs: * - mounted: is hugetlbfs mounted for below mount_path * - mount_path: hugetlbfs mount path * - mount_opt: hugetlb mount option * - node_path: record for hugetlbfs node path * - pg_size: this hugetlbfs's page size * - lowmem: lowmem of this hugetlbfs need allocate * - highmem: highmem of this hugetlbfs need allocate *.- pages_delta: its value equals needed pages - free pages, *.---if > 0: it's the gap for needed page; if < 0, more free than needed. * - nr_pages_path: sys path for total number of pages *.- free_pages_path: sys path for number of free pages */ struct hugetlb_info { int fd; int pg_size; size_t lowmem; size_t fbmem; size_t biosmem; size_t highmem; unsigned int flags; int pages_delta; char *nr_pages_path; char *free_pages_path; }; static struct hugetlb_info hugetlb_priv[HUGETLB_LV_MAX] = { { .fd = -1, .pg_size = 2048 * 1024, .lowmem = 0, .fbmem = 0, .biosmem = 0, .highmem = 0, .flags = MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB | MFD_HUGE_2MB, .pages_delta = 0, .nr_pages_path = SYS_PATH_LV1 SYS_NR_HUGEPAGES, .free_pages_path = SYS_PATH_LV1 SYS_FREE_HUGEPAGES, }, { .fd = -1, .pg_size = 1024 * 1024 * 1024, .lowmem = 0, .fbmem = 0, .biosmem = 0, .highmem = 0, .flags = MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB | MFD_HUGE_1GB, .pages_delta = 0, .nr_pages_path = SYS_PATH_LV2 SYS_NR_HUGEPAGES, .free_pages_path = SYS_PATH_LV2 SYS_FREE_HUGEPAGES, }, }; struct vm_mmap_mem_region { vm_paddr_t gpa_start; vm_paddr_t gpa_end; vm_paddr_t fd_offset; char *hva_base; int fd; }; static struct vm_mmap_mem_region mmap_mem_regions[16]; static int mem_idx; static void *ptr; static size_t total_size; static int hugetlb_lv_max; static int lock_fd; static int lock_acrn_hugetlb(void) { int ret; ret = lockf(lock_fd, F_LOCK, LOCK_OFFSET_END); if (ret < 0) { pr_err("lock acrn hugetlb failed with errno: %d\n", errno); return ret; } return 0; } static int unlock_acrn_hugetlb(void) { int ret; ret = lockf(lock_fd, F_ULOCK, LOCK_OFFSET_END); if (ret < 0) { pr_err("lock acrn hugetlb failed with errno: %d\n", errno); return ret; } return 0; } static void close_hugetlbfs(int level) { if (level >= HUGETLB_LV_MAX) { pr_err("exceed max hugetlb level"); return; } if (hugetlb_priv[level].fd >= 0) { close(hugetlb_priv[level].fd); hugetlb_priv[level].fd = -1; } } static bool should_enable_hugetlb_level(int level) { if (level >= HUGETLB_LV_MAX) { pr_err("exceed max hugetlb level"); return false; } if (hugetlb_priv[level].fd < 0) return false; return (hugetlb_priv[level].lowmem > 0 || hugetlb_priv[level].fbmem > 0 || hugetlb_priv[level].biosmem > 0 || hugetlb_priv[level].highmem > 0); } /* * level : hugepage level * len : region length for mmap * offset : region start offset from ctx->baseaddr * skip : skip offset in different level hugetlbfs fd */ static int mmap_hugetlbfs_from_level(struct vmctx *ctx, int level, size_t len, size_t offset, size_t skip, char **addr_out) { char *addr; size_t pagesz = 0; int fd, i; if (level >= HUGETLB_LV_MAX) { pr_err("exceed max hugetlb level"); return -EINVAL; } if (mem_idx >= ARRAY_SIZE(mmap_mem_regions)) { pr_err("exceed supported regions.\n"); return -EFAULT; } fd = hugetlb_priv[level].fd; addr = mmap(ctx->baseaddr + offset, len, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fd, skip); if (addr == MAP_FAILED) return -ENOMEM; if (addr_out) *addr_out = addr; /* add the mapping into mmap_mem_region */ mmap_mem_regions[mem_idx].gpa_start = offset; mmap_mem_regions[mem_idx].gpa_end = offset + len; mmap_mem_regions[mem_idx].fd = fd; mmap_mem_regions[mem_idx].fd_offset = skip; mmap_mem_regions[mem_idx].hva_base = addr; mem_idx++; pr_info("mmap 0x%lx@%p\n", len, addr); /* pre-allocate hugepages by touch them */ pagesz = hugetlb_priv[level].pg_size; pr_info("touch %ld pages with pagesz 0x%lx\n", len/pagesz, pagesz); for (i = 0; i < len/pagesz; i++) { *(volatile char *)addr = *addr; addr += pagesz; } return 0; } static int mmap_hugetlbfs(struct vmctx *ctx, size_t offset, void (*get_param)(struct hugetlb_info *, size_t *, size_t *), size_t (*adj_param)(struct hugetlb_info *, struct hugetlb_info *, int), char **addr) { size_t len, skip; int level, ret = 0, pg_size; for (level = hugetlb_lv_max - 1; level >= HUGETLB_LV1; level--) { get_param(&hugetlb_priv[level], &len, &skip); pg_size = hugetlb_priv[level].pg_size; while (len > 0) { ret = mmap_hugetlbfs_from_level(ctx, level, len, offset, skip, addr); if (ret < 0 && level > HUGETLB_LV1) { len = adj_param( &hugetlb_priv[level], &hugetlb_priv[level-1], pg_size); } else if (ret < 0 && level == HUGETLB_LV1) { goto done; } else { offset += len; break; } } } done: return ret; } static void get_lowmem_param(struct hugetlb_info *htlb, size_t *len, size_t *skip) { *len = htlb->lowmem; *skip = 0; /* nothing to skip as lowmen is mmap'ed first */ } static size_t adj_lowmem_param(struct hugetlb_info *htlb, struct hugetlb_info *htlb_prev, int adj_size) { htlb->lowmem -= adj_size; htlb_prev->lowmem += adj_size; return htlb->lowmem; } static void get_highmem_param(struct hugetlb_info *htlb, size_t *len, size_t *skip) { *len = htlb->highmem; *skip = htlb->lowmem; } static size_t adj_highmem_param(struct hugetlb_info *htlb, struct hugetlb_info *htlb_prev, int adj_size) { htlb->highmem -= adj_size; htlb_prev->highmem += adj_size; return htlb->highmem; } static void get_biosmem_param(struct hugetlb_info *htlb, size_t *len, size_t *skip) { *len = htlb->biosmem; *skip = htlb->lowmem + htlb->highmem; } static size_t adj_biosmem_param(struct hugetlb_info *htlb, struct hugetlb_info *htlb_prev, int adj_size) { htlb->biosmem -= adj_size; htlb_prev->biosmem += adj_size; return htlb->biosmem; } #define FB_SIZE (16 * MB) static void get_fbmem_param(struct hugetlb_info *htlb, size_t *len, size_t *skip) { if (htlb == &hugetlb_priv[0]) { *len = FB_SIZE; *skip = htlb->lowmem + htlb->highmem + htlb->biosmem; } else { *len = 0; *skip = htlb->lowmem + htlb->highmem + htlb->biosmem; } } static size_t adj_fbmem_param(struct hugetlb_info *htlb, struct hugetlb_info *htlb_prev, int adj_size) { htlb->fbmem -= adj_size; htlb_prev->fbmem += adj_size; return htlb->fbmem; } static int read_sys_info(const char *sys_path) { FILE *fp; char tmp_buf[12]; int pages = 0; int result; fp = fopen(sys_path, "r"); if (fp == NULL) { pr_err("can't open: %s, err: %s\n", sys_path, strerror(errno)); return 0; } memset(tmp_buf, 0, 12); result = fread(&tmp_buf, sizeof(char), 8, fp); if (result <= 0) pr_err("read %s, error: %s, please check!\n", sys_path, strerror(errno)); else pages = strtol(tmp_buf, NULL, 10); fclose(fp); return pages; } /* check if enough free huge pages for the User VM */ static bool hugetlb_check_memgap(void) { int lvl, free_pages, need_pages; bool has_gap = false; for (lvl = HUGETLB_LV1; lvl < hugetlb_lv_max; lvl++) { if (hugetlb_priv[lvl].fd < 0) { hugetlb_priv[lvl].pages_delta = 0; continue; } free_pages = read_sys_info(hugetlb_priv[lvl].free_pages_path); need_pages = (hugetlb_priv[lvl].lowmem + hugetlb_priv[lvl].fbmem + hugetlb_priv[lvl].biosmem + hugetlb_priv[lvl].highmem) / hugetlb_priv[lvl].pg_size; hugetlb_priv[lvl].pages_delta = need_pages - free_pages; /* if delta > 0, it's a gap for needed pages, to be handled */ if (hugetlb_priv[lvl].pages_delta > 0) has_gap = true; pr_info("level %d free/need pages:%d/%d page size:0x%x\n", lvl, free_pages, need_pages, hugetlb_priv[lvl].pg_size); } return has_gap; } /* try to reserve more huge pages on the level */ static void reserve_more_pages(int level) { int total_pages, orig_pages, cur_pages; char cmd_buf[MAX_PATH_LEN]; FILE *fp; orig_pages = read_sys_info(hugetlb_priv[level].nr_pages_path); total_pages = orig_pages + hugetlb_priv[level].pages_delta; snprintf(cmd_buf, MAX_PATH_LEN, "echo %d > %s", total_pages, hugetlb_priv[level].nr_pages_path); /* system cmd to reserve needed huge pages */ fp = popen(cmd_buf, "r"); if (fp == NULL) { pr_err("cmd: %s failed!\n", cmd_buf); return; } pclose(fp); pr_info("to reserve pages (+orig %d): %s\n", orig_pages, cmd_buf); cur_pages = read_sys_info(hugetlb_priv[level].nr_pages_path); hugetlb_priv[level].pages_delta = total_pages - cur_pages; } /* try to release larger free page */ static bool release_larger_freepage(int level_limit) { int level; int total_pages, orig_pages, cur_pages; char cmd_buf[MAX_PATH_LEN]; FILE *fp; for (level = hugetlb_lv_max - 1; level >= level_limit; level--) { if (hugetlb_priv[level].pages_delta >= 0) continue; /* free one unsed larger page */ orig_pages = read_sys_info(hugetlb_priv[level].nr_pages_path); total_pages = orig_pages - 1; snprintf(cmd_buf, MAX_PATH_LEN, "echo %d > %s", total_pages, hugetlb_priv[level].nr_pages_path); fp = popen(cmd_buf, "r"); if (fp == NULL) { pr_err("cmd to free mem: %s failed!\n", cmd_buf); return false; } pclose(fp); cur_pages = read_sys_info(hugetlb_priv[level].nr_pages_path); /* release page successfully */ if (cur_pages < orig_pages) { hugetlb_priv[level].pages_delta++; break; } } if (level < level_limit) return false; return true; } /* reserve more free huge pages as different levels. * it need handle following cases: * A.no enough free memory to reserve gap pages, just fails. * B enough free memory to reserve each level gap pages *.C.enough free memory, but it can't reserve enough higher level gap pages, * so lower level need handle that, to reserve more free pages. *.D.enough higher level free pages, but not enough free memory for * lower level gap pages, so release some higher level free pages for that. * other info: *. even enough free memory, it is eaiser to reserve smaller pages than * lager ones, for example:2MB easier than 1GB. One flow of current solution: *.it could leave Service VM very small free memory. *.return value: true: success; false: failure */ static bool hugetlb_reserve_pages(void) { int left_gap = 0, pg_size; int level; pr_info("to reserve more free pages:\n"); for (level = hugetlb_lv_max - 1; level >= HUGETLB_LV1; level--) { if (hugetlb_priv[level].pages_delta <= 0) continue; /* if gaps, try to reserve more pages */ reserve_more_pages(level); /* check if reserved enough pages */ if (hugetlb_priv[level].pages_delta <= 0) continue; /* probably system allocates fewer pages than needed * especially for larger page like 1GB, even there is enough * free memory, it stil can fail to allocate 1GB huge page. * so if that,it needs the next level to handle it. */ if (level > HUGETLB_LV1) { left_gap = hugetlb_priv[level].pages_delta; pg_size = hugetlb_priv[level].pg_size; hugetlb_priv[level - 1].pages_delta += (size_t)left_gap * (pg_size / hugetlb_priv[level - 1].pg_size); continue; } /* now for level == HUGETLB_LV1 it still can't alloc enough * pages, go back to larger pages, to check if it can *.release some unused free pages, if release success, * let it go LV1 again */ if (release_larger_freepage(level + 1)) level++; else break; } if (level >= HUGETLB_LV1) { pr_err("level %d pages gap: %d failed to reserve!\n", level, left_gap); return false; } pr_info("now enough free pages are reserved!\n"); return true; } bool init_hugetlb(void) { char path[MAX_PATH_LEN] = {0}; int i; int level; int fd; size_t len; /* Try to create the dir of /run/hugetlb/acrn */ snprintf(path, MAX_PATH_LEN, "%s/", ACRN_HUGETLB_LOCK_DIR); len = strnlen(path, MAX_PATH_LEN); for (i = 1; i < len; i++) { if (path[i] == '/') { path[i] = 0; if (access(path, F_OK) != 0) { if (mkdir(path, 0755) < 0) { pr_err("mkdir %s failed.\n", path); return -1; } } path[i] = '/'; } } for (level = HUGETLB_LV1; level < HUGETLB_LV_MAX; level++) { hugetlb_priv[level].fd = -1; fd = memfd_create("acrn_memfd", hugetlb_priv[level].flags); if (fd == -1) break; hugetlb_priv[level].fd = fd; } if (level == HUGETLB_LV1) /* mount fail for level 1 */ return false; else if (level == HUGETLB_LV2) /* mount fail for level 2 */ pr_warn("WARNING: only level 1 hugetlb supported"); lock_fd = open(ACRN_HUGETLB_LOCK_FILE, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); if (lock_fd < 0) { return false; } lseek(lock_fd, SEEK_SET, LOCK_OFFSET_START); hugetlb_lv_max = level; return true; } void uninit_hugetlb(void) { int level; for (level = HUGETLB_LV1; level < hugetlb_lv_max; level++) { if (hugetlb_priv[level].fd > 0) close(hugetlb_priv[level].fd); hugetlb_priv[level].fd = -1; } close(lock_fd); } int hugetlb_setup_memory(struct vmctx *ctx) { int level; size_t lowmem, fbmem, biosmem, highmem; bool has_gap; int fd; unsigned int seal_flag = F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL; size_t mem_size_level; mem_idx = 0; memset(&mmap_mem_regions, 0, sizeof(mmap_mem_regions)); if (ctx->lowmem == 0) { pr_err("vm requests 0 memory"); goto err; } /* In course of reboot sequence, the memfd is closed. So it * needs to recreate the memfd if it is closed. */ for (level = HUGETLB_LV1; level < HUGETLB_LV_MAX; level++) { if (hugetlb_priv[level].fd > 0) continue; fd = memfd_create("acrn_memfd", hugetlb_priv[level].flags); if (fd == -1) { pr_err("Fail to create memfd for %d.\n", level); break; } hugetlb_priv[level].fd = fd; } if (ALIGN_CHECK(ctx->lowmem, hugetlb_priv[HUGETLB_LV1].pg_size) || ALIGN_CHECK(ctx->highmem, hugetlb_priv[HUGETLB_LV1].pg_size) || ALIGN_CHECK(ctx->biosmem, hugetlb_priv[HUGETLB_LV1].pg_size) || ALIGN_CHECK(ctx->fbmem, hugetlb_priv[HUGETLB_LV1].pg_size)) { pr_err("Memory size is not aligned to 2M.\n"); goto err; } /* all memory should be at least aligned with * hugetlb_priv[HUGETLB_LV1].pg_size */ ctx->lowmem = ALIGN_DOWN(ctx->lowmem, hugetlb_priv[HUGETLB_LV1].pg_size); ctx->fbmem = ALIGN_DOWN(ctx->fbmem, hugetlb_priv[HUGETLB_LV1].pg_size); ctx->biosmem = ALIGN_DOWN(ctx->biosmem, hugetlb_priv[HUGETLB_LV1].pg_size); ctx->highmem = ALIGN_DOWN(ctx->highmem, hugetlb_priv[HUGETLB_LV1].pg_size); total_size = ctx->highmem_gpa_base + ctx->highmem; /* check & set hugetlb level memory size for lowmem/biosmem/highmem */ lowmem = ctx->lowmem; fbmem = ctx->fbmem; biosmem = ctx->biosmem; highmem = ctx->highmem; for (level = hugetlb_lv_max - 1; level >= HUGETLB_LV1; level--) { if (hugetlb_priv[level].fd < 0) { hugetlb_priv[level].lowmem = 0; hugetlb_priv[level].highmem = 0; hugetlb_priv[level].biosmem = 0; hugetlb_priv[level].fbmem = 0; continue; } hugetlb_priv[level].lowmem = ALIGN_DOWN(lowmem, hugetlb_priv[level].pg_size); hugetlb_priv[level].fbmem = ALIGN_DOWN(fbmem, hugetlb_priv[level].pg_size); hugetlb_priv[level].biosmem = ALIGN_DOWN(biosmem, hugetlb_priv[level].pg_size); hugetlb_priv[level].highmem = ALIGN_DOWN(highmem, hugetlb_priv[level].pg_size); if (level > HUGETLB_LV1) { hugetlb_priv[level-1].lowmem = lowmem = lowmem - hugetlb_priv[level].lowmem; hugetlb_priv[level-1].fbmem = fbmem = fbmem - hugetlb_priv[level].fbmem; hugetlb_priv[level-1].biosmem = biosmem = biosmem - hugetlb_priv[level].biosmem; hugetlb_priv[level-1].highmem = highmem = highmem - hugetlb_priv[level].highmem; } } lock_acrn_hugetlb(); /* it will check each level memory need */ has_gap = hugetlb_check_memgap(); if (has_gap) { if (!hugetlb_reserve_pages()) goto err_lock; } /* align up total size with huge page size for vma alignment */ for (level = hugetlb_lv_max - 1; level >= HUGETLB_LV1; level--) { if (should_enable_hugetlb_level(level)) { total_size += hugetlb_priv[level].pg_size; break; } } /* dump hugepage trying to setup */ pr_info("\ntry to setup hugepage with:\n"); for (level = HUGETLB_LV1; level < hugetlb_lv_max; level++) { pr_info("\tlevel %d - lowmem 0x%lx, fbmem 0x%lx, biosmem 0x%lx, highmem 0x%lx\n", level, hugetlb_priv[level].lowmem, hugetlb_priv[level].fbmem, hugetlb_priv[level].biosmem, hugetlb_priv[level].highmem); } pr_info("total_size 0x%lx\n\n", total_size); /* basic overview vma */ ptr = mmap(NULL, total_size, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); if (ptr == MAP_FAILED) { pr_err("anony mmap fail"); goto err_lock; } /* align up baseaddr according to hugepage level size */ for (level = hugetlb_lv_max - 1; level >= HUGETLB_LV1; level--) { if (should_enable_hugetlb_level(level)) { ctx->baseaddr = (void *)ALIGN_UP((size_t)ptr, hugetlb_priv[level].pg_size); break; } } pr_info("mmap ptr 0x%p -> baseaddr 0x%p\n", ptr, ctx->baseaddr); /* mmap lowmem */ if (mmap_hugetlbfs(ctx, 0, get_lowmem_param, adj_lowmem_param, NULL) < 0) { pr_err("lowmem mmap failed"); goto err_lock; } /* mmap highmem */ if (mmap_hugetlbfs(ctx, ctx->highmem_gpa_base, get_highmem_param, adj_highmem_param, NULL) < 0) { pr_err("highmem mmap failed"); goto err_lock; } /* mmap biosmem */ if (mmap_hugetlbfs(ctx, 4 * GB - ctx->biosmem, get_biosmem_param, adj_biosmem_param, NULL) < 0) { pr_err("biosmem mmap failed"); goto err_lock; } /* mmap fbmem */ if (mmap_hugetlbfs(ctx, 4 * GB - ctx->biosmem - ctx->fbmem, get_fbmem_param, adj_fbmem_param, (char **)&ctx->fb_base) < 0) { pr_err("fbmem mmap failed"); goto err_lock; } /* resize the memfd to meet with the size requirement and add the * F_SEAL_SEAL flag */ for (level = HUGETLB_LV1; level < hugetlb_lv_max; level++) { if (hugetlb_priv[level].fd > 0) { mem_size_level = hugetlb_priv[level].lowmem + hugetlb_priv[level].highmem + hugetlb_priv[level].biosmem + hugetlb_priv[level].fbmem; if (ftruncate(hugetlb_priv[level].fd, mem_size_level) == -1) { pr_err("Fail to set mem_size for level %d.\n", level); goto err_lock; } if (fcntl(hugetlb_priv[level].fd, F_ADD_SEALS, seal_flag) == -1) { pr_err("Fail to set seal flag for level %d.\n", level); goto err_lock; } } } unlock_acrn_hugetlb(); /* dump hugepage really setup */ pr_info("\nreally setup hugepage with:\n"); for (level = HUGETLB_LV1; level < hugetlb_lv_max; level++) { pr_info("\tlevel %d - lowmem 0x%lx, biosmem 0x%lx, highmem 0x%lx\n", level, hugetlb_priv[level].lowmem, hugetlb_priv[level].biosmem, hugetlb_priv[level].highmem); } /* map ept for lowmem */ if (vm_map_memseg_vma(ctx, ctx->lowmem, 0, (uint64_t)ctx->baseaddr, PROT_ALL) < 0) goto err; /* map ept for biosmem */ if (ctx->biosmem > 0) { /* * The High BIOS region can behave as RAM and be * modified by the boot firmware itself (e.g. OVMF * NV data storage region). */ if (vm_map_memseg_vma(ctx, ctx->biosmem, 4 * GB - ctx->biosmem, (uint64_t)(ctx->baseaddr + 4 * GB - ctx->biosmem), PROT_ALL) < 0) goto err; } /* map ept for highmem */ if (ctx->highmem > 0) { if (vm_map_memseg_vma(ctx, ctx->highmem, ctx->highmem_gpa_base, (uint64_t)(ctx->baseaddr + ctx->highmem_gpa_base), PROT_ALL) < 0) goto err; } return 0; err_lock: unlock_acrn_hugetlb(); err: if (ptr) { munmap(ptr, total_size); ptr = NULL; } for (level = HUGETLB_LV1; level < hugetlb_lv_max; level++) { close_hugetlbfs(level); } return -ENOMEM; } void hugetlb_unsetup_memory(struct vmctx *ctx) { int level; if (total_size > 0) { munmap(ptr, total_size); total_size = 0; ptr = NULL; } for (level = HUGETLB_LV1; level < hugetlb_lv_max; level++) { close_hugetlbfs(level); } } bool vm_find_memfd_region(struct vmctx *ctx, vm_paddr_t gpa, struct vm_mem_region *ret_region) { int i; uint64_t offset; struct vm_mmap_mem_region *mmap_region; bool ret; mmap_region = NULL; for (i = 0; i < mem_idx; i++) { if ((gpa >= mmap_mem_regions[i].gpa_start) && (gpa < mmap_mem_regions[i].gpa_end)) { mmap_region = &mmap_mem_regions[i]; break; } } if (mmap_region && ret_region) { ret = true; offset = gpa - mmap_region->gpa_start; ret_region->fd = mmap_region->fd; ret_region->fd_offset = offset + mmap_region->fd_offset; } else ret = false; return ret; } bool vm_allow_dmabuf(struct vmctx *ctx) { uint32_t mem_flags; mem_flags = 0; if (ctx->highmem) { /* Check the highmem is used by HUGETLB_LV1/HUGETLB_LV2 */ if ((hugetlb_priv[HUGETLB_LV1].fd > 0) && (hugetlb_priv[HUGETLB_LV1].highmem)) mem_flags |= 1; if ((hugetlb_priv[HUGETLB_LV2].fd > 0) && (hugetlb_priv[HUGETLB_LV2].highmem)) mem_flags |= 0x02; if (mem_flags == 0x03) return false; } if (ctx->lowmem) { /* Check the lowhmem is used by HUGETLB_LV1/HUGETLB_LV2 */ mem_flags = 0; if ((hugetlb_priv[HUGETLB_LV1].fd > 0) && (hugetlb_priv[HUGETLB_LV1].lowmem)) mem_flags |= 1; if ((hugetlb_priv[HUGETLB_LV2].fd > 0) && (hugetlb_priv[HUGETLB_LV2].lowmem)) mem_flags |= 0x02; if (mem_flags == 0x03) return false; } return true; }