mm: avoid unnecessary flush on change_huge_pmd()
Calls to change_protection_range() on THP can trigger, at least on x86, two TLB flushes for one page: one immediately, when pmdp_invalidate() is called by change_huge_pmd(), and then another one later (that can be batched) when change_protection_range() finishes. The first TLB flush is only necessary to prevent the dirty bit (and with a lesser importance the access bit) from changing while the PTE is modified. However, this is not necessary as the x86 CPUs set the dirty-bit atomically with an additional check that the PTE is (still) present. One caveat is Intel's Knights Landing that has a bug and does not do so. Leverage this behavior to eliminate the unnecessary TLB flush in change_huge_pmd(). Introduce a new arch specific pmdp_invalidate_ad() that only invalidates the access and dirty bit from further changes. Link: https://lkml.kernel.org/r/20220401180821.1986781-4-namit@vmware.com Signed-off-by: Nadav Amit <namit@vmware.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Andrew Cooper <andrew.cooper3@citrix.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Peter Xu <peterx@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Will Deacon <will@kernel.org> Cc: Yu Zhao <yuzhao@google.com> Cc: Nick Piggin <npiggin@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
c9fe66560b
commit
4f83145721
|
@ -1168,6 +1168,11 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
|
|||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#define __HAVE_ARCH_PMDP_INVALIDATE_AD
|
||||
extern pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma,
|
||||
unsigned long address, pmd_t *pmdp);
|
||||
|
||||
/*
|
||||
* Page table pages are page-aligned. The lower half of the top
|
||||
* level is used for userspace and the top half for the kernel.
|
||||
|
|
|
@ -608,6 +608,16 @@ int pmdp_clear_flush_young(struct vm_area_struct *vma,
|
|||
|
||||
return young;
|
||||
}
|
||||
|
||||
pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, unsigned long address,
|
||||
pmd_t *pmdp)
|
||||
{
|
||||
/*
|
||||
* No flush is necessary. Once an invalid PTE is established, the PTE's
|
||||
* access and dirty bits cannot be updated.
|
||||
*/
|
||||
return pmdp_establish(vma, address, pmdp, pmd_mkinvalid(*pmdp));
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
|
|
|
@ -570,6 +570,26 @@ extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
|
|||
pmd_t *pmdp);
|
||||
#endif
|
||||
|
||||
#ifndef __HAVE_ARCH_PMDP_INVALIDATE_AD
|
||||
|
||||
/*
|
||||
* pmdp_invalidate_ad() invalidates the PMD while changing a transparent
|
||||
* hugepage mapping in the page tables. This function is similar to
|
||||
* pmdp_invalidate(), but should only be used if the access and dirty bits would
|
||||
* not be cleared by the software in the new PMD value. The function ensures
|
||||
* that hardware changes of the access and dirty bits updates would not be lost.
|
||||
*
|
||||
* Doing so can allow in certain architectures to avoid a TLB flush in most
|
||||
* cases. Yet, another TLB flush might be necessary later if the PMD update
|
||||
* itself requires such flush (e.g., if protection was set to be stricter). Yet,
|
||||
* even when a TLB flush is needed because of the update, the caller may be able
|
||||
* to batch these TLB flushing operations, so fewer TLB flush operations are
|
||||
* needed.
|
||||
*/
|
||||
extern pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma,
|
||||
unsigned long address, pmd_t *pmdp);
|
||||
#endif
|
||||
|
||||
#ifndef __HAVE_ARCH_PTE_SAME
|
||||
static inline int pte_same(pte_t pte_a, pte_t pte_b)
|
||||
{
|
||||
|
|
|
@ -1801,10 +1801,10 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
|
|||
* The race makes MADV_DONTNEED miss the huge pmd and don't clear it
|
||||
* which may break userspace.
|
||||
*
|
||||
* pmdp_invalidate() is required to make sure we don't miss
|
||||
* pmdp_invalidate_ad() is required to make sure we don't miss
|
||||
* dirty/young flags set by hardware.
|
||||
*/
|
||||
oldpmd = pmdp_invalidate(vma, addr, pmd);
|
||||
oldpmd = pmdp_invalidate_ad(vma, addr, pmd);
|
||||
|
||||
entry = pmd_modify(oldpmd, newprot);
|
||||
if (preserve_write)
|
||||
|
|
|
@ -201,6 +201,14 @@ pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifndef __HAVE_ARCH_PMDP_INVALIDATE_AD
|
||||
pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, unsigned long address,
|
||||
pmd_t *pmdp)
|
||||
{
|
||||
return pmdp_invalidate(vma, address, pmdp);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef pmdp_collapse_flush
|
||||
pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
|
||||
pmd_t *pmdp)
|
||||
|
|
Loading…
Reference in New Issue