mm: hugetlb_vmemmap: improve hugetlb_vmemmap code readability

There is a discussion about the name of hugetlb_vmemmap_alloc/free in
thread [1].  The suggestion suggested by David is rename "alloc/free" to
"optimize/restore" to make functionalities clearer to users, "optimize"
means the function will optimize vmemmap pages, while "restore" means
restoring its vmemmap pages discared before.  This commit does this.

Another discussion is the confusion RESERVE_VMEMMAP_NR isn't used
explicitly for vmemmap_addr but implicitly for vmemmap_end in
hugetlb_vmemmap_alloc/free.  David suggested we can compute what
hugetlb_vmemmap_init() does now at runtime.  We do not need to worry for
the overhead of computing at runtime since the calculation is simple
enough and those functions are not in a hot path.  This commit has the
following improvements:

  1) The function suffixed name ("optimize/restore") is more expressive.
  2) The logic becomes less weird in hugetlb_vmemmap_optimize/restore().
  3) The hugetlb_vmemmap_init() does not need to be exported anymore.
  4) A ->optimize_vmemmap_pages field in struct hstate is killed.
  5) There is only one place where checks is_power_of_2(sizeof(struct
     page)) instead of two places.
  6) Add more comments for hugetlb_vmemmap_optimize/restore().
  7) For external users, hugetlb_optimize_vmemmap_pages() is used for
     detecting if the HugeTLB's vmemmap pages is optimizable originally.
     In this commit, it is killed and we introduce a new helper
     hugetlb_vmemmap_optimizable() to replace it.  The name is more
     expressive.

Link: https://lore.kernel.org/all/20220404074652.68024-2-songmuchun@bytedance.com/ [1]
Link: https://lkml.kernel.org/r/20220628092235.91270-7-songmuchun@bytedance.com
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Will Deacon <will@kernel.org>
Cc: Xiongchun Duan <duanxiongchun@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Muchun Song 2022-06-28 17:22:33 +08:00 committed by Andrew Morton
parent 30152245c6
commit 6213834c10
5 changed files with 102 additions and 108 deletions

View File

@ -638,9 +638,6 @@ struct hstate {
unsigned int nr_huge_pages_node[MAX_NUMNODES]; unsigned int nr_huge_pages_node[MAX_NUMNODES];
unsigned int free_huge_pages_node[MAX_NUMNODES]; unsigned int free_huge_pages_node[MAX_NUMNODES];
unsigned int surplus_huge_pages_node[MAX_NUMNODES]; unsigned int surplus_huge_pages_node[MAX_NUMNODES];
#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
unsigned int optimize_vmemmap_pages;
#endif
#ifdef CONFIG_CGROUP_HUGETLB #ifdef CONFIG_CGROUP_HUGETLB
/* cgroup control files */ /* cgroup control files */
struct cftype cgroup_files_dfl[8]; struct cftype cgroup_files_dfl[8];
@ -716,7 +713,7 @@ static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
return hstate_file(vma->vm_file); return hstate_file(vma->vm_file);
} }
static inline unsigned long huge_page_size(struct hstate *h) static inline unsigned long huge_page_size(const struct hstate *h)
{ {
return (unsigned long)PAGE_SIZE << h->order; return (unsigned long)PAGE_SIZE << h->order;
} }
@ -745,7 +742,7 @@ static inline bool hstate_is_gigantic(struct hstate *h)
return huge_page_order(h) >= MAX_ORDER; return huge_page_order(h) >= MAX_ORDER;
} }
static inline unsigned int pages_per_huge_page(struct hstate *h) static inline unsigned int pages_per_huge_page(const struct hstate *h)
{ {
return 1 << h->order; return 1 << h->order;
} }

View File

@ -268,6 +268,10 @@ static inline struct ctl_table_header *register_sysctl_table(struct ctl_table *
return NULL; return NULL;
} }
static inline void register_sysctl_init(const char *path, struct ctl_table *table)
{
}
static inline struct ctl_table_header *register_sysctl_mount_point(const char *path) static inline struct ctl_table_header *register_sysctl_mount_point(const char *path)
{ {
return NULL; return NULL;

View File

@ -1535,7 +1535,7 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported()) if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())
return; return;
if (hugetlb_vmemmap_alloc(h, page)) { if (hugetlb_vmemmap_restore(h, page)) {
spin_lock_irq(&hugetlb_lock); spin_lock_irq(&hugetlb_lock);
/* /*
* If we cannot allocate vmemmap pages, just refuse to free the * If we cannot allocate vmemmap pages, just refuse to free the
@ -1612,7 +1612,7 @@ static DECLARE_WORK(free_hpage_work, free_hpage_workfn);
static inline void flush_free_hpage_work(struct hstate *h) static inline void flush_free_hpage_work(struct hstate *h)
{ {
if (hugetlb_optimize_vmemmap_pages(h)) if (hugetlb_vmemmap_optimizable(h))
flush_work(&free_hpage_work); flush_work(&free_hpage_work);
} }
@ -1734,7 +1734,7 @@ static void __prep_account_new_huge_page(struct hstate *h, int nid)
static void __prep_new_huge_page(struct hstate *h, struct page *page) static void __prep_new_huge_page(struct hstate *h, struct page *page)
{ {
hugetlb_vmemmap_free(h, page); hugetlb_vmemmap_optimize(h, page);
INIT_LIST_HEAD(&page->lru); INIT_LIST_HEAD(&page->lru);
set_compound_page_dtor(page, HUGETLB_PAGE_DTOR); set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
hugetlb_set_page_subpool(page, NULL); hugetlb_set_page_subpool(page, NULL);
@ -2107,7 +2107,7 @@ int dissolve_free_huge_page(struct page *page)
* Attempt to allocate vmemmmap here so that we can take * Attempt to allocate vmemmmap here so that we can take
* appropriate action on failure. * appropriate action on failure.
*/ */
rc = hugetlb_vmemmap_alloc(h, head); rc = hugetlb_vmemmap_restore(h, head);
if (!rc) { if (!rc) {
/* /*
* Move PageHWPoison flag from head page to the raw * Move PageHWPoison flag from head page to the raw
@ -3182,8 +3182,10 @@ static void __init report_hugepages(void)
char buf[32]; char buf[32];
string_get_size(huge_page_size(h), 1, STRING_UNITS_2, buf, 32); string_get_size(huge_page_size(h), 1, STRING_UNITS_2, buf, 32);
pr_info("HugeTLB registered %s page size, pre-allocated %ld pages\n", pr_info("HugeTLB: registered %s page size, pre-allocated %ld pages\n",
buf, h->free_huge_pages); buf, h->free_huge_pages);
pr_info("HugeTLB: %d KiB vmemmap can be freed for a %s page\n",
hugetlb_vmemmap_optimizable_size(h) / SZ_1K, buf);
} }
} }
@ -3421,7 +3423,7 @@ static int demote_free_huge_page(struct hstate *h, struct page *page)
remove_hugetlb_page_for_demote(h, page, false); remove_hugetlb_page_for_demote(h, page, false);
spin_unlock_irq(&hugetlb_lock); spin_unlock_irq(&hugetlb_lock);
rc = hugetlb_vmemmap_alloc(h, page); rc = hugetlb_vmemmap_restore(h, page);
if (rc) { if (rc) {
/* Allocation of vmemmmap failed, we can not demote page */ /* Allocation of vmemmmap failed, we can not demote page */
spin_lock_irq(&hugetlb_lock); spin_lock_irq(&hugetlb_lock);
@ -4111,7 +4113,6 @@ void __init hugetlb_add_hstate(unsigned int order)
h->next_nid_to_free = first_memory_node; h->next_nid_to_free = first_memory_node;
snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB", snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB",
huge_page_size(h)/1024); huge_page_size(h)/1024);
hugetlb_vmemmap_init(h);
parsed_hstate = h; parsed_hstate = h;
} }

View File

@ -35,16 +35,6 @@ struct vmemmap_remap_walk {
struct list_head *vmemmap_pages; struct list_head *vmemmap_pages;
}; };
/*
* There are a lot of struct page structures associated with each HugeTLB page.
* For tail pages, the value of compound_head is the same. So we can reuse first
* page of head page structures. We map the virtual addresses of all the pages
* of tail page structures to the head page struct, and then free these page
* frames. Therefore, we need to reserve one pages as vmemmap areas.
*/
#define RESERVE_VMEMMAP_NR 1U
#define RESERVE_VMEMMAP_SIZE (RESERVE_VMEMMAP_NR << PAGE_SHIFT)
static int __split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start) static int __split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start)
{ {
pmd_t __pmd; pmd_t __pmd;
@ -426,32 +416,37 @@ EXPORT_SYMBOL(hugetlb_optimize_vmemmap_key);
static bool vmemmap_optimize_enabled = IS_ENABLED(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON); static bool vmemmap_optimize_enabled = IS_ENABLED(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON);
core_param(hugetlb_free_vmemmap, vmemmap_optimize_enabled, bool, 0); core_param(hugetlb_free_vmemmap, vmemmap_optimize_enabled, bool, 0);
/* /**
* Previously discarded vmemmap pages will be allocated and remapping * hugetlb_vmemmap_restore - restore previously optimized (by
* after this function returns zero. * hugetlb_vmemmap_optimize()) vmemmap pages which
* will be reallocated and remapped.
* @h: struct hstate.
* @head: the head page whose vmemmap pages will be restored.
*
* Return: %0 if @head's vmemmap pages have been reallocated and remapped,
* negative error code otherwise.
*/ */
int hugetlb_vmemmap_alloc(struct hstate *h, struct page *head) int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head)
{ {
int ret; int ret;
unsigned long vmemmap_addr = (unsigned long)head; unsigned long vmemmap_start = (unsigned long)head, vmemmap_end;
unsigned long vmemmap_end, vmemmap_reuse, vmemmap_pages; unsigned long vmemmap_reuse;
if (!HPageVmemmapOptimized(head)) if (!HPageVmemmapOptimized(head))
return 0; return 0;
vmemmap_addr += RESERVE_VMEMMAP_SIZE; vmemmap_end = vmemmap_start + hugetlb_vmemmap_size(h);
vmemmap_pages = hugetlb_optimize_vmemmap_pages(h); vmemmap_reuse = vmemmap_start;
vmemmap_end = vmemmap_addr + (vmemmap_pages << PAGE_SHIFT); vmemmap_start += HUGETLB_VMEMMAP_RESERVE_SIZE;
vmemmap_reuse = vmemmap_addr - PAGE_SIZE;
/* /*
* The pages which the vmemmap virtual address range [@vmemmap_addr, * The pages which the vmemmap virtual address range [@vmemmap_start,
* @vmemmap_end) are mapped to are freed to the buddy allocator, and * @vmemmap_end) are mapped to are freed to the buddy allocator, and
* the range is mapped to the page which @vmemmap_reuse is mapped to. * the range is mapped to the page which @vmemmap_reuse is mapped to.
* When a HugeTLB page is freed to the buddy allocator, previously * When a HugeTLB page is freed to the buddy allocator, previously
* discarded vmemmap pages must be allocated and remapping. * discarded vmemmap pages must be allocated and remapping.
*/ */
ret = vmemmap_remap_alloc(vmemmap_addr, vmemmap_end, vmemmap_reuse, ret = vmemmap_remap_alloc(vmemmap_start, vmemmap_end, vmemmap_reuse,
GFP_KERNEL | __GFP_NORETRY | __GFP_THISNODE); GFP_KERNEL | __GFP_NORETRY | __GFP_THISNODE);
if (!ret) { if (!ret) {
ClearHPageVmemmapOptimized(head); ClearHPageVmemmapOptimized(head);
@ -461,11 +456,14 @@ int hugetlb_vmemmap_alloc(struct hstate *h, struct page *head)
return ret; return ret;
} }
static unsigned int vmemmap_optimizable_pages(struct hstate *h, /* Return true iff a HugeTLB whose vmemmap should and can be optimized. */
struct page *head) static bool vmemmap_should_optimize(const struct hstate *h, const struct page *head)
{ {
if (!READ_ONCE(vmemmap_optimize_enabled)) if (!READ_ONCE(vmemmap_optimize_enabled))
return 0; return false;
if (!hugetlb_vmemmap_optimizable(h))
return false;
if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) { if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) {
pmd_t *pmdp, pmd; pmd_t *pmdp, pmd;
@ -508,73 +506,47 @@ static unsigned int vmemmap_optimizable_pages(struct hstate *h,
* +-------------------------------------------+ * +-------------------------------------------+
*/ */
if (PageVmemmapSelfHosted(vmemmap_page)) if (PageVmemmapSelfHosted(vmemmap_page))
return 0; return false;
} }
return hugetlb_optimize_vmemmap_pages(h); return true;
} }
void hugetlb_vmemmap_free(struct hstate *h, struct page *head) /**
* hugetlb_vmemmap_optimize - optimize @head page's vmemmap pages.
* @h: struct hstate.
* @head: the head page whose vmemmap pages will be optimized.
*
* This function only tries to optimize @head's vmemmap pages and does not
* guarantee that the optimization will succeed after it returns. The caller
* can use HPageVmemmapOptimized(@head) to detect if @head's vmemmap pages
* have been optimized.
*/
void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head)
{ {
unsigned long vmemmap_addr = (unsigned long)head; unsigned long vmemmap_start = (unsigned long)head, vmemmap_end;
unsigned long vmemmap_end, vmemmap_reuse, vmemmap_pages; unsigned long vmemmap_reuse;
vmemmap_pages = vmemmap_optimizable_pages(h, head); if (!vmemmap_should_optimize(h, head))
if (!vmemmap_pages)
return; return;
static_branch_inc(&hugetlb_optimize_vmemmap_key); static_branch_inc(&hugetlb_optimize_vmemmap_key);
vmemmap_addr += RESERVE_VMEMMAP_SIZE; vmemmap_end = vmemmap_start + hugetlb_vmemmap_size(h);
vmemmap_end = vmemmap_addr + (vmemmap_pages << PAGE_SHIFT); vmemmap_reuse = vmemmap_start;
vmemmap_reuse = vmemmap_addr - PAGE_SIZE; vmemmap_start += HUGETLB_VMEMMAP_RESERVE_SIZE;
/* /*
* Remap the vmemmap virtual address range [@vmemmap_addr, @vmemmap_end) * Remap the vmemmap virtual address range [@vmemmap_start, @vmemmap_end)
* to the page which @vmemmap_reuse is mapped to, then free the pages * to the page which @vmemmap_reuse is mapped to, then free the pages
* which the range [@vmemmap_addr, @vmemmap_end] is mapped to. * which the range [@vmemmap_start, @vmemmap_end] is mapped to.
*/ */
if (vmemmap_remap_free(vmemmap_addr, vmemmap_end, vmemmap_reuse)) if (vmemmap_remap_free(vmemmap_start, vmemmap_end, vmemmap_reuse))
static_branch_dec(&hugetlb_optimize_vmemmap_key); static_branch_dec(&hugetlb_optimize_vmemmap_key);
else else
SetHPageVmemmapOptimized(head); SetHPageVmemmapOptimized(head);
} }
void __init hugetlb_vmemmap_init(struct hstate *h)
{
unsigned int nr_pages = pages_per_huge_page(h);
unsigned int vmemmap_pages;
/*
* There are only (RESERVE_VMEMMAP_SIZE / sizeof(struct page)) struct
* page structs that can be used when HVO is enabled, add a BUILD_BUG_ON
* to catch invalid usage of the tail page structs.
*/
BUILD_BUG_ON(__NR_USED_SUBPAGE >=
RESERVE_VMEMMAP_SIZE / sizeof(struct page));
if (!is_power_of_2(sizeof(struct page))) {
pr_warn_once("cannot optimize vmemmap pages because \"struct page\" crosses page boundaries\n");
return;
}
vmemmap_pages = (nr_pages * sizeof(struct page)) >> PAGE_SHIFT;
/*
* The head page is not to be freed to buddy allocator, the other tail
* pages will map to the head page, so they can be freed.
*
* Could RESERVE_VMEMMAP_NR be greater than @vmemmap_pages? It is true
* on some architectures (e.g. aarch64). See Documentation/arm64/
* hugetlbpage.rst for more details.
*/
if (likely(vmemmap_pages > RESERVE_VMEMMAP_NR))
h->optimize_vmemmap_pages = vmemmap_pages - RESERVE_VMEMMAP_NR;
pr_info("can optimize %d vmemmap pages for %s\n",
h->optimize_vmemmap_pages, h->name);
}
#ifdef CONFIG_PROC_SYSCTL
static struct ctl_table hugetlb_vmemmap_sysctls[] = { static struct ctl_table hugetlb_vmemmap_sysctls[] = {
{ {
.procname = "hugetlb_optimize_vmemmap", .procname = "hugetlb_optimize_vmemmap",
@ -586,16 +558,21 @@ static struct ctl_table hugetlb_vmemmap_sysctls[] = {
{ } { }
}; };
static __init int hugetlb_vmemmap_sysctls_init(void) static int __init hugetlb_vmemmap_init(void)
{ {
/* /* HUGETLB_VMEMMAP_RESERVE_SIZE should cover all used struct pages */
* If "struct page" crosses page boundaries, the vmemmap pages cannot BUILD_BUG_ON(__NR_USED_SUBPAGE * sizeof(struct page) > HUGETLB_VMEMMAP_RESERVE_SIZE);
* be optimized.
*/
if (is_power_of_2(sizeof(struct page)))
register_sysctl_init("vm", hugetlb_vmemmap_sysctls);
if (IS_ENABLED(CONFIG_PROC_SYSCTL)) {
const struct hstate *h;
for_each_hstate(h) {
if (hugetlb_vmemmap_optimizable(h)) {
register_sysctl_init("vm", hugetlb_vmemmap_sysctls);
break;
}
}
}
return 0; return 0;
} }
late_initcall(hugetlb_vmemmap_sysctls_init); late_initcall(hugetlb_vmemmap_init);
#endif /* CONFIG_PROC_SYSCTL */

View File

@ -11,35 +11,50 @@
#include <linux/hugetlb.h> #include <linux/hugetlb.h>
#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP #ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
int hugetlb_vmemmap_alloc(struct hstate *h, struct page *head); int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head);
void hugetlb_vmemmap_free(struct hstate *h, struct page *head); void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head);
void hugetlb_vmemmap_init(struct hstate *h);
/* /*
* How many vmemmap pages associated with a HugeTLB page that can be * Reserve one vmemmap page, all vmemmap addresses are mapped to it. See
* optimized and freed to the buddy allocator. * Documentation/vm/vmemmap_dedup.rst.
*/ */
static inline unsigned int hugetlb_optimize_vmemmap_pages(struct hstate *h) #define HUGETLB_VMEMMAP_RESERVE_SIZE PAGE_SIZE
static inline unsigned int hugetlb_vmemmap_size(const struct hstate *h)
{ {
return h->optimize_vmemmap_pages; return pages_per_huge_page(h) * sizeof(struct page);
}
/*
* Return how many vmemmap size associated with a HugeTLB page that can be
* optimized and can be freed to the buddy allocator.
*/
static inline unsigned int hugetlb_vmemmap_optimizable_size(const struct hstate *h)
{
int size = hugetlb_vmemmap_size(h) - HUGETLB_VMEMMAP_RESERVE_SIZE;
if (!is_power_of_2(sizeof(struct page)))
return 0;
return size > 0 ? size : 0;
} }
#else #else
static inline int hugetlb_vmemmap_alloc(struct hstate *h, struct page *head) static inline int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head)
{ {
return 0; return 0;
} }
static inline void hugetlb_vmemmap_free(struct hstate *h, struct page *head) static inline void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head)
{ {
} }
static inline void hugetlb_vmemmap_init(struct hstate *h) static inline unsigned int hugetlb_vmemmap_optimizable_size(const struct hstate *h)
{
}
static inline unsigned int hugetlb_optimize_vmemmap_pages(struct hstate *h)
{ {
return 0; return 0;
} }
#endif /* CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP */ #endif /* CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP */
static inline bool hugetlb_vmemmap_optimizable(const struct hstate *h)
{
return hugetlb_vmemmap_optimizable_size(h) != 0;
}
#endif /* _LINUX_HUGETLB_VMEMMAP_H */ #endif /* _LINUX_HUGETLB_VMEMMAP_H */