From 68f2736a858324c3ec852f6c2cddd9d1c777357d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 7 Jun 2022 15:38:48 -0400 Subject: [PATCH] mm: Convert all PageMovable users to movable_operations These drivers are rather uncomfortably hammered into the address_space_operations hole. They aren't filesystems and don't behave like filesystems. They just need their own movable_operations structure, which we can point to directly from page->mapping. Signed-off-by: Matthew Wilcox (Oracle) --- Documentation/filesystems/locking.rst | 4 - Documentation/filesystems/vfs.rst | 12 --- Documentation/vm/page_migration.rst | 113 +++----------------------- arch/powerpc/platforms/pseries/cmm.c | 60 +------------- drivers/misc/vmw_balloon.c | 61 +------------- drivers/virtio/virtio_balloon.c | 47 +---------- include/linux/balloon_compaction.h | 6 +- include/linux/fs.h | 2 - include/linux/migrate.h | 56 +++++++++++-- include/linux/page-flags.h | 2 +- include/uapi/linux/magic.h | 4 - mm/balloon_compaction.c | 10 +-- mm/compaction.c | 29 +++---- mm/migrate.c | 24 +++--- mm/util.c | 4 +- mm/z3fold.c | 84 ++----------------- mm/zsmalloc.c | 102 +++++------------------ 17 files changed, 134 insertions(+), 486 deletions(-) diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst index c0fe711f14d3..9963d9600b71 100644 --- a/Documentation/filesystems/locking.rst +++ b/Documentation/filesystems/locking.rst @@ -252,9 +252,7 @@ prototypes:: bool (*release_folio)(struct folio *, gfp_t); void (*free_folio)(struct folio *); int (*direct_IO)(struct kiocb *, struct iov_iter *iter); - bool (*isolate_page) (struct page *, isolate_mode_t); int (*migratepage)(struct address_space *, struct page *, struct page *); - void (*putback_page) (struct page *); int (*launder_folio)(struct folio *); bool (*is_partially_uptodate)(struct folio *, size_t from, size_t count); int (*error_remove_page)(struct address_space *, struct page *); @@ -280,9 +278,7 @@ invalidate_folio: yes exclusive release_folio: yes free_folio: yes direct_IO: -isolate_page: yes migratepage: yes (both) -putback_page: yes launder_folio: yes is_partially_uptodate: yes error_remove_page: yes diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst index a08c652467d7..b51665cdabc4 100644 --- a/Documentation/filesystems/vfs.rst +++ b/Documentation/filesystems/vfs.rst @@ -737,12 +737,8 @@ cache in your filesystem. The following members are defined: bool (*release_folio)(struct folio *, gfp_t); void (*free_folio)(struct folio *); ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter); - /* isolate a page for migration */ - bool (*isolate_page) (struct page *, isolate_mode_t); /* migrate the contents of a page to the specified target */ int (*migratepage) (struct page *, struct page *); - /* put migration-failed page back to right list */ - void (*putback_page) (struct page *); int (*launder_folio) (struct folio *); bool (*is_partially_uptodate) (struct folio *, size_t from, @@ -930,11 +926,6 @@ cache in your filesystem. The following members are defined: data directly between the storage and the application's address space. -``isolate_page`` - Called by the VM when isolating a movable non-lru page. If page - is successfully isolated, VM marks the page as PG_isolated via - __SetPageIsolated. - ``migrate_page`` This is used to compact the physical memory usage. If the VM wants to relocate a page (maybe off a memory card that is @@ -942,9 +933,6 @@ cache in your filesystem. The following members are defined: page to this function. migrate_page should transfer any private data across and update any references that it has to the page. -``putback_page`` - Called by the VM when isolated page's migration fails. - ``launder_folio`` Called before freeing a folio - it writes back the dirty folio. To prevent redirtying the folio, it is kept locked during the diff --git a/Documentation/vm/page_migration.rst b/Documentation/vm/page_migration.rst index 8c5cb8147e55..11493bad7112 100644 --- a/Documentation/vm/page_migration.rst +++ b/Documentation/vm/page_migration.rst @@ -152,110 +152,15 @@ Steps: Non-LRU page migration ====================== -Although migration originally aimed for reducing the latency of memory accesses -for NUMA, compaction also uses migration to create high-order pages. +Although migration originally aimed for reducing the latency of memory +accesses for NUMA, compaction also uses migration to create high-order +pages. For compaction purposes, it is also useful to be able to move +non-LRU pages, such as zsmalloc and virtio-balloon pages. -Current problem of the implementation is that it is designed to migrate only -*LRU* pages. However, there are potential non-LRU pages which can be migrated -in drivers, for example, zsmalloc, virtio-balloon pages. - -For virtio-balloon pages, some parts of migration code path have been hooked -up and added virtio-balloon specific functions to intercept migration logics. -It's too specific to a driver so other drivers who want to make their pages -movable would have to add their own specific hooks in the migration path. - -To overcome the problem, VM supports non-LRU page migration which provides -generic functions for non-LRU movable pages without driver specific hooks -in the migration path. - -If a driver wants to make its pages movable, it should define three functions -which are function pointers of struct address_space_operations. - -1. ``bool (*isolate_page) (struct page *page, isolate_mode_t mode);`` - - What VM expects from isolate_page() function of driver is to return *true* - if driver isolates the page successfully. On returning true, VM marks the page - as PG_isolated so concurrent isolation in several CPUs skip the page - for isolation. If a driver cannot isolate the page, it should return *false*. - - Once page is successfully isolated, VM uses page.lru fields so driver - shouldn't expect to preserve values in those fields. - -2. ``int (*migratepage) (struct address_space *mapping,`` -| ``struct page *newpage, struct page *oldpage, enum migrate_mode);`` - - After isolation, VM calls migratepage() of driver with the isolated page. - The function of migratepage() is to move the contents of the old page to the - new page - and set up fields of struct page newpage. Keep in mind that you should - indicate to the VM the oldpage is no longer movable via __ClearPageMovable() - under page_lock if you migrated the oldpage successfully and returned - MIGRATEPAGE_SUCCESS. If driver cannot migrate the page at the moment, driver - can return -EAGAIN. On -EAGAIN, VM will retry page migration in a short time - because VM interprets -EAGAIN as "temporary migration failure". On returning - any error except -EAGAIN, VM will give up the page migration without - retrying. - - Driver shouldn't touch the page.lru field while in the migratepage() function. - -3. ``void (*putback_page)(struct page *);`` - - If migration fails on the isolated page, VM should return the isolated page - to the driver so VM calls the driver's putback_page() with the isolated page. - In this function, the driver should put the isolated page back into its own data - structure. - -Non-LRU movable page flags - - There are two page flags for supporting non-LRU movable page. - - * PG_movable - - Driver should use the function below to make page movable under page_lock:: - - void __SetPageMovable(struct page *page, struct address_space *mapping) - - It needs argument of address_space for registering migration - family functions which will be called by VM. Exactly speaking, - PG_movable is not a real flag of struct page. Rather, VM - reuses the page->mapping's lower bits to represent it:: - - #define PAGE_MAPPING_MOVABLE 0x2 - page->mapping = page->mapping | PAGE_MAPPING_MOVABLE; - - so driver shouldn't access page->mapping directly. Instead, driver should - use page_mapping() which masks off the low two bits of page->mapping under - page lock so it can get the right struct address_space. - - For testing of non-LRU movable pages, VM supports __PageMovable() function. - However, it doesn't guarantee to identify non-LRU movable pages because - the page->mapping field is unified with other variables in struct page. - If the driver releases the page after isolation by VM, page->mapping - doesn't have a stable value although it has PAGE_MAPPING_MOVABLE set - (look at __ClearPageMovable). But __PageMovable() is cheap to call whether - page is LRU or non-LRU movable once the page has been isolated because LRU - pages can never have PAGE_MAPPING_MOVABLE set in page->mapping. It is also - good for just peeking to test non-LRU movable pages before more expensive - checking with lock_page() in pfn scanning to select a victim. - - For guaranteeing non-LRU movable page, VM provides PageMovable() function. - Unlike __PageMovable(), PageMovable() validates page->mapping and - mapping->a_ops->isolate_page under lock_page(). The lock_page() prevents - sudden destroying of page->mapping. - - Drivers using __SetPageMovable() should clear the flag via - __ClearMovablePage() under page_lock() before the releasing the page. - - * PG_isolated - - To prevent concurrent isolation among several CPUs, VM marks isolated page - as PG_isolated under lock_page(). So if a CPU encounters PG_isolated - non-LRU movable page, it can skip it. Driver doesn't need to manipulate the - flag because VM will set/clear it automatically. Keep in mind that if the - driver sees a PG_isolated page, it means the page has been isolated by the - VM so it shouldn't touch the page.lru field. - The PG_isolated flag is aliased with the PG_reclaim flag so drivers - shouldn't use PG_isolated for its own purposes. +If a driver wants to make its pages movable, it should define a struct +movable_operations. It then needs to call __SetPageMovable() on each +page that it may be able to move. This uses the ``page->mapping`` field, +so this field is not available for the driver to use for other purposes. Monitoring Migration ===================== @@ -286,3 +191,5 @@ THP_MIGRATION_FAIL and PGMIGRATE_FAIL to increase. Christoph Lameter, May 8, 2006. Minchan Kim, Mar 28, 2016. + +.. kernel-doc:: include/linux/migrate.h diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index 15ed8206c463..5f4037c1d7fe 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -19,9 +19,6 @@ #include #include #include -#include -#include -#include #include #include #include @@ -500,19 +497,6 @@ static struct notifier_block cmm_mem_nb = { }; #ifdef CONFIG_BALLOON_COMPACTION -static struct vfsmount *balloon_mnt; - -static int cmm_init_fs_context(struct fs_context *fc) -{ - return init_pseudo(fc, PPC_CMM_MAGIC) ? 0 : -ENOMEM; -} - -static struct file_system_type balloon_fs = { - .name = "ppc-cmm", - .init_fs_context = cmm_init_fs_context, - .kill_sb = kill_anon_super, -}; - static int cmm_migratepage(struct balloon_dev_info *b_dev_info, struct page *newpage, struct page *page, enum migrate_mode mode) @@ -564,47 +548,13 @@ static int cmm_migratepage(struct balloon_dev_info *b_dev_info, return MIGRATEPAGE_SUCCESS; } -static int cmm_balloon_compaction_init(void) +static void cmm_balloon_compaction_init(void) { - int rc; - balloon_devinfo_init(&b_dev_info); b_dev_info.migratepage = cmm_migratepage; - - balloon_mnt = kern_mount(&balloon_fs); - if (IS_ERR(balloon_mnt)) { - rc = PTR_ERR(balloon_mnt); - balloon_mnt = NULL; - return rc; - } - - b_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb); - if (IS_ERR(b_dev_info.inode)) { - rc = PTR_ERR(b_dev_info.inode); - b_dev_info.inode = NULL; - kern_unmount(balloon_mnt); - balloon_mnt = NULL; - return rc; - } - - b_dev_info.inode->i_mapping->a_ops = &balloon_aops; - return 0; -} -static void cmm_balloon_compaction_deinit(void) -{ - if (b_dev_info.inode) - iput(b_dev_info.inode); - b_dev_info.inode = NULL; - kern_unmount(balloon_mnt); - balloon_mnt = NULL; } #else /* CONFIG_BALLOON_COMPACTION */ -static int cmm_balloon_compaction_init(void) -{ - return 0; -} - -static void cmm_balloon_compaction_deinit(void) +static void cmm_balloon_compaction_init(void) { } #endif /* CONFIG_BALLOON_COMPACTION */ @@ -622,9 +572,7 @@ static int cmm_init(void) if (!firmware_has_feature(FW_FEATURE_CMO) && !simulate) return -EOPNOTSUPP; - rc = cmm_balloon_compaction_init(); - if (rc) - return rc; + cmm_balloon_compaction_init(); rc = register_oom_notifier(&cmm_oom_nb); if (rc < 0) @@ -658,7 +606,6 @@ static int cmm_init(void) out_oom_notifier: unregister_oom_notifier(&cmm_oom_nb); out_balloon_compaction: - cmm_balloon_compaction_deinit(); return rc; } @@ -677,7 +624,6 @@ static void cmm_exit(void) unregister_memory_notifier(&cmm_mem_nb); cmm_free_pages(atomic_long_read(&loaned_pages)); cmm_unregister_sysfs(&cmm_dev); - cmm_balloon_compaction_deinit(); } /** diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c index 086ce77d9074..85dd6aa33df6 100644 --- a/drivers/misc/vmw_balloon.c +++ b/drivers/misc/vmw_balloon.c @@ -29,8 +29,6 @@ #include #include #include -#include -#include #include #include #include @@ -1730,20 +1728,6 @@ static inline void vmballoon_debugfs_exit(struct vmballoon *b) #ifdef CONFIG_BALLOON_COMPACTION - -static int vmballoon_init_fs_context(struct fs_context *fc) -{ - return init_pseudo(fc, BALLOON_VMW_MAGIC) ? 0 : -ENOMEM; -} - -static struct file_system_type vmballoon_fs = { - .name = "balloon-vmware", - .init_fs_context = vmballoon_init_fs_context, - .kill_sb = kill_anon_super, -}; - -static struct vfsmount *vmballoon_mnt; - /** * vmballoon_migratepage() - migrates a balloon page. * @b_dev_info: balloon device information descriptor. @@ -1862,21 +1846,6 @@ static int vmballoon_migratepage(struct balloon_dev_info *b_dev_info, return ret; } -/** - * vmballoon_compaction_deinit() - removes compaction related data. - * - * @b: pointer to the balloon. - */ -static void vmballoon_compaction_deinit(struct vmballoon *b) -{ - if (!IS_ERR(b->b_dev_info.inode)) - iput(b->b_dev_info.inode); - - b->b_dev_info.inode = NULL; - kern_unmount(vmballoon_mnt); - vmballoon_mnt = NULL; -} - /** * vmballoon_compaction_init() - initialized compaction for the balloon. * @@ -1888,33 +1857,15 @@ static void vmballoon_compaction_deinit(struct vmballoon *b) * * Return: zero on success or error code on failure. */ -static __init int vmballoon_compaction_init(struct vmballoon *b) +static __init void vmballoon_compaction_init(struct vmballoon *b) { - vmballoon_mnt = kern_mount(&vmballoon_fs); - if (IS_ERR(vmballoon_mnt)) - return PTR_ERR(vmballoon_mnt); - b->b_dev_info.migratepage = vmballoon_migratepage; - b->b_dev_info.inode = alloc_anon_inode(vmballoon_mnt->mnt_sb); - - if (IS_ERR(b->b_dev_info.inode)) - return PTR_ERR(b->b_dev_info.inode); - - b->b_dev_info.inode->i_mapping->a_ops = &balloon_aops; - return 0; } #else /* CONFIG_BALLOON_COMPACTION */ - -static void vmballoon_compaction_deinit(struct vmballoon *b) +static inline void vmballoon_compaction_init(struct vmballoon *b) { } - -static int vmballoon_compaction_init(struct vmballoon *b) -{ - return 0; -} - #endif /* CONFIG_BALLOON_COMPACTION */ static int __init vmballoon_init(void) @@ -1939,9 +1890,7 @@ static int __init vmballoon_init(void) * balloon_devinfo_init() . */ balloon_devinfo_init(&balloon.b_dev_info); - error = vmballoon_compaction_init(&balloon); - if (error) - goto fail; + vmballoon_compaction_init(&balloon); INIT_LIST_HEAD(&balloon.huge_pages); spin_lock_init(&balloon.comm_lock); @@ -1958,7 +1907,6 @@ static int __init vmballoon_init(void) return 0; fail: vmballoon_unregister_shrinker(&balloon); - vmballoon_compaction_deinit(&balloon); return error; } @@ -1985,8 +1933,5 @@ static void __exit vmballoon_exit(void) */ vmballoon_send_start(&balloon, 0); vmballoon_pop(&balloon); - - /* Only once we popped the balloon, compaction can be deinit */ - vmballoon_compaction_deinit(&balloon); } module_exit(vmballoon_exit); diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index b9737da6c4dd..bd360b91e9d3 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -17,9 +17,6 @@ #include #include #include -#include -#include -#include #include /* @@ -42,10 +39,6 @@ (1 << (VIRTIO_BALLOON_HINT_BLOCK_ORDER + PAGE_SHIFT)) #define VIRTIO_BALLOON_HINT_BLOCK_PAGES (1 << VIRTIO_BALLOON_HINT_BLOCK_ORDER) -#ifdef CONFIG_BALLOON_COMPACTION -static struct vfsmount *balloon_mnt; -#endif - enum virtio_balloon_vq { VIRTIO_BALLOON_VQ_INFLATE, VIRTIO_BALLOON_VQ_DEFLATE, @@ -805,18 +798,6 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info, return MIGRATEPAGE_SUCCESS; } - -static int balloon_init_fs_context(struct fs_context *fc) -{ - return init_pseudo(fc, BALLOON_KVM_MAGIC) ? 0 : -ENOMEM; -} - -static struct file_system_type balloon_fs = { - .name = "balloon-kvm", - .init_fs_context = balloon_init_fs_context, - .kill_sb = kill_anon_super, -}; - #endif /* CONFIG_BALLOON_COMPACTION */ static unsigned long shrink_free_pages(struct virtio_balloon *vb, @@ -909,19 +890,7 @@ static int virtballoon_probe(struct virtio_device *vdev) goto out_free_vb; #ifdef CONFIG_BALLOON_COMPACTION - balloon_mnt = kern_mount(&balloon_fs); - if (IS_ERR(balloon_mnt)) { - err = PTR_ERR(balloon_mnt); - goto out_del_vqs; - } - vb->vb_dev_info.migratepage = virtballoon_migratepage; - vb->vb_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb); - if (IS_ERR(vb->vb_dev_info.inode)) { - err = PTR_ERR(vb->vb_dev_info.inode); - goto out_kern_unmount; - } - vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops; #endif if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { /* @@ -930,13 +899,13 @@ static int virtballoon_probe(struct virtio_device *vdev) */ if (virtqueue_get_vring_size(vb->free_page_vq) < 2) { err = -ENOSPC; - goto out_iput; + goto out_del_vqs; } vb->balloon_wq = alloc_workqueue("balloon-wq", WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0); if (!vb->balloon_wq) { err = -ENOMEM; - goto out_iput; + goto out_del_vqs; } INIT_WORK(&vb->report_free_page_work, report_free_page_func); vb->cmd_id_received_cache = VIRTIO_BALLOON_CMD_ID_STOP; @@ -1030,13 +999,7 @@ static int virtballoon_probe(struct virtio_device *vdev) out_del_balloon_wq: if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) destroy_workqueue(vb->balloon_wq); -out_iput: -#ifdef CONFIG_BALLOON_COMPACTION - iput(vb->vb_dev_info.inode); -out_kern_unmount: - kern_unmount(balloon_mnt); out_del_vqs: -#endif vdev->config->del_vqs(vdev); out_free_vb: kfree(vb); @@ -1083,12 +1046,6 @@ static void virtballoon_remove(struct virtio_device *vdev) } remove_common(vb); -#ifdef CONFIG_BALLOON_COMPACTION - if (vb->vb_dev_info.inode) - iput(vb->vb_dev_info.inode); - - kern_unmount(balloon_mnt); -#endif kfree(vb); } diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h index edb7f6d41faa..5ca2d5699620 100644 --- a/include/linux/balloon_compaction.h +++ b/include/linux/balloon_compaction.h @@ -57,7 +57,6 @@ struct balloon_dev_info { struct list_head pages; /* Pages enqueued & handled to Host */ int (*migratepage)(struct balloon_dev_info *, struct page *newpage, struct page *page, enum migrate_mode mode); - struct inode *inode; }; extern struct page *balloon_page_alloc(void); @@ -75,11 +74,10 @@ static inline void balloon_devinfo_init(struct balloon_dev_info *balloon) spin_lock_init(&balloon->pages_lock); INIT_LIST_HEAD(&balloon->pages); balloon->migratepage = NULL; - balloon->inode = NULL; } #ifdef CONFIG_BALLOON_COMPACTION -extern const struct address_space_operations balloon_aops; +extern const struct movable_operations balloon_mops; /* * balloon_page_insert - insert a page into the balloon's page list and make @@ -94,7 +92,7 @@ static inline void balloon_page_insert(struct balloon_dev_info *balloon, struct page *page) { __SetPageOffline(page); - __SetPageMovable(page, balloon->inode->i_mapping); + __SetPageMovable(page, &balloon_mops); set_page_private(page, (unsigned long)balloon); list_add(&page->lru, &balloon->pages); } diff --git a/include/linux/fs.h b/include/linux/fs.h index 9ad5e3520fae..5d8ee3155ca2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -367,8 +367,6 @@ struct address_space_operations { */ int (*migratepage) (struct address_space *, struct page *, struct page *, enum migrate_mode); - bool (*isolate_page)(struct page *, isolate_mode_t); - void (*putback_page)(struct page *); int (*launder_folio)(struct folio *); bool (*is_partially_uptodate) (struct folio *, size_t from, size_t count); diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 069a89e847f3..82c735ba6109 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -19,6 +19,43 @@ struct migration_target_control; */ #define MIGRATEPAGE_SUCCESS 0 +/** + * struct movable_operations - Driver page migration + * @isolate_page: + * The VM calls this function to prepare the page to be moved. The page + * is locked and the driver should not unlock it. The driver should + * return ``true`` if the page is movable and ``false`` if it is not + * currently movable. After this function returns, the VM uses the + * page->lru field, so the driver must preserve any information which + * is usually stored here. + * + * @migrate_page: + * After isolation, the VM calls this function with the isolated + * @src page. The driver should copy the contents of the + * @src page to the @dst page and set up the fields of @dst page. + * Both pages are locked. + * If page migration is successful, the driver should call + * __ClearPageMovable(@src) and return MIGRATEPAGE_SUCCESS. + * If the driver cannot migrate the page at the moment, it can return + * -EAGAIN. The VM interprets this as a temporary migration failure and + * will retry it later. Any other error value is a permanent migration + * failure and migration will not be retried. + * The driver shouldn't touch the @src->lru field while in the + * migrate_page() function. It may write to @dst->lru. + * + * @putback_page: + * If migration fails on the isolated page, the VM informs the driver + * that the page is no longer a candidate for migration by calling + * this function. The driver should put the isolated page back into + * its own data structure. + */ +struct movable_operations { + bool (*isolate_page)(struct page *, isolate_mode_t); + int (*migrate_page)(struct page *dst, struct page *src, + enum migrate_mode); + void (*putback_page)(struct page *); +}; + /* Defined in mm/debug.c: */ extern const char *migrate_reason_names[MR_TYPES]; @@ -91,13 +128,13 @@ static inline int next_demotion_node(int node) #endif #ifdef CONFIG_COMPACTION -extern int PageMovable(struct page *page); -extern void __SetPageMovable(struct page *page, struct address_space *mapping); -extern void __ClearPageMovable(struct page *page); +bool PageMovable(struct page *page); +void __SetPageMovable(struct page *page, const struct movable_operations *ops); +void __ClearPageMovable(struct page *page); #else -static inline int PageMovable(struct page *page) { return 0; } +static inline bool PageMovable(struct page *page) { return false; } static inline void __SetPageMovable(struct page *page, - struct address_space *mapping) + const struct movable_operations *ops) { } static inline void __ClearPageMovable(struct page *page) @@ -110,6 +147,15 @@ static inline bool folio_test_movable(struct folio *folio) return PageMovable(&folio->page); } +static inline +const struct movable_operations *page_movable_ops(struct page *page) +{ + VM_BUG_ON(!__PageMovable(page)); + + return (const struct movable_operations *) + ((unsigned long)page->mapping - PAGE_MAPPING_MOVABLE); +} + #ifdef CONFIG_NUMA_BALANCING extern int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, int node); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index e66f7aa3191d..3f5490f6f038 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -639,7 +639,7 @@ __PAGEFLAG(Reported, reported, PF_NO_COMPOUND) * structure which KSM associates with that merged page. See ksm.h. * * PAGE_MAPPING_KSM without PAGE_MAPPING_ANON is used for non-lru movable - * page and then page->mapping points a struct address_space. + * page and then page->mapping points to a struct movable_operations. * * Please note that, confusingly, "page_mapping" refers to the inode * address_space which maps the page from disk; whereas "page_mapped" diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index f724129c0425..6325d1d0e90f 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -98,12 +98,8 @@ /* Since UDF 2.01 is ISO 13346 based... */ #define UDF_SUPER_MAGIC 0x15013346 -#define BALLOON_KVM_MAGIC 0x13661366 -#define ZSMALLOC_MAGIC 0x58295829 #define DMA_BUF_MAGIC 0x444d4142 /* "DMAB" */ #define DEVMEM_MAGIC 0x454d444d /* "DMEM" */ -#define Z3FOLD_MAGIC 0x33 -#define PPC_CMM_MAGIC 0xc7571590 #define SECRETMEM_MAGIC 0x5345434d /* "SECM" */ #endif /* __LINUX_MAGIC_H__ */ diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c index 4b8eab4b3f45..22c96fed70b5 100644 --- a/mm/balloon_compaction.c +++ b/mm/balloon_compaction.c @@ -228,10 +228,8 @@ static void balloon_page_putback(struct page *page) spin_unlock_irqrestore(&b_dev_info->pages_lock, flags); } - /* move_to_new_page() counterpart for a ballooned page */ -static int balloon_page_migrate(struct address_space *mapping, - struct page *newpage, struct page *page, +static int balloon_page_migrate(struct page *newpage, struct page *page, enum migrate_mode mode) { struct balloon_dev_info *balloon = balloon_page_device(page); @@ -250,11 +248,11 @@ static int balloon_page_migrate(struct address_space *mapping, return balloon->migratepage(balloon, newpage, page, mode); } -const struct address_space_operations balloon_aops = { - .migratepage = balloon_page_migrate, +const struct movable_operations balloon_mops = { + .migrate_page = balloon_page_migrate, .isolate_page = balloon_page_isolate, .putback_page = balloon_page_putback, }; -EXPORT_SYMBOL_GPL(balloon_aops); +EXPORT_SYMBOL_GPL(balloon_mops); #endif /* CONFIG_BALLOON_COMPACTION */ diff --git a/mm/compaction.c b/mm/compaction.c index 1f89b969c12b..f23efba1d118 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -110,28 +110,27 @@ static void split_map_pages(struct list_head *list) } #ifdef CONFIG_COMPACTION - -int PageMovable(struct page *page) +bool PageMovable(struct page *page) { - struct address_space *mapping; + const struct movable_operations *mops; VM_BUG_ON_PAGE(!PageLocked(page), page); if (!__PageMovable(page)) - return 0; + return false; - mapping = page_mapping(page); - if (mapping && mapping->a_ops && mapping->a_ops->isolate_page) - return 1; + mops = page_movable_ops(page); + if (mops) + return true; - return 0; + return false; } EXPORT_SYMBOL(PageMovable); -void __SetPageMovable(struct page *page, struct address_space *mapping) +void __SetPageMovable(struct page *page, const struct movable_operations *mops) { VM_BUG_ON_PAGE(!PageLocked(page), page); - VM_BUG_ON_PAGE((unsigned long)mapping & PAGE_MAPPING_MOVABLE, page); - page->mapping = (void *)((unsigned long)mapping | PAGE_MAPPING_MOVABLE); + VM_BUG_ON_PAGE((unsigned long)mops & PAGE_MAPPING_MOVABLE, page); + page->mapping = (void *)((unsigned long)mops | PAGE_MAPPING_MOVABLE); } EXPORT_SYMBOL(__SetPageMovable); @@ -139,12 +138,10 @@ void __ClearPageMovable(struct page *page) { VM_BUG_ON_PAGE(!PageMovable(page), page); /* - * Clear registered address_space val with keeping PAGE_MAPPING_MOVABLE - * flag so that VM can catch up released page by driver after isolation. - * With it, VM migration doesn't try to put it back. + * This page still has the type of a movable page, but it's + * actually not movable any more. */ - page->mapping = (void *)((unsigned long)page->mapping & - PAGE_MAPPING_MOVABLE); + page->mapping = (void *)PAGE_MAPPING_MOVABLE; } EXPORT_SYMBOL(__ClearPageMovable); diff --git a/mm/migrate.c b/mm/migrate.c index 6c1ea61f39d8..491f03747832 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -59,7 +59,7 @@ int isolate_movable_page(struct page *page, isolate_mode_t mode) { - struct address_space *mapping; + const struct movable_operations *mops; /* * Avoid burning cycles with pages that are yet under __free_pages(), @@ -97,10 +97,10 @@ int isolate_movable_page(struct page *page, isolate_mode_t mode) if (!PageMovable(page) || PageIsolated(page)) goto out_no_isolated; - mapping = page_mapping(page); - VM_BUG_ON_PAGE(!mapping, page); + mops = page_movable_ops(page); + VM_BUG_ON_PAGE(!mops, page); - if (!mapping->a_ops->isolate_page(page, mode)) + if (!mops->isolate_page(page, mode)) goto out_no_isolated; /* Driver shouldn't use PG_isolated bit of page->flags */ @@ -120,10 +120,9 @@ int isolate_movable_page(struct page *page, isolate_mode_t mode) static void putback_movable_page(struct page *page) { - struct address_space *mapping; + const struct movable_operations *mops = page_movable_ops(page); - mapping = page_mapping(page); - mapping->a_ops->putback_page(page); + mops->putback_page(page); ClearPageIsolated(page); } @@ -846,16 +845,15 @@ static int fallback_migrate_page(struct address_space *mapping, static int move_to_new_folio(struct folio *dst, struct folio *src, enum migrate_mode mode) { - struct address_space *mapping; int rc = -EAGAIN; bool is_lru = !__PageMovable(&src->page); VM_BUG_ON_FOLIO(!folio_test_locked(src), src); VM_BUG_ON_FOLIO(!folio_test_locked(dst), dst); - mapping = folio_mapping(src); - if (likely(is_lru)) { + struct address_space *mapping = folio_mapping(src); + if (!mapping) rc = migrate_page(mapping, &dst->page, &src->page, mode); else if (mapping->a_ops->migratepage) @@ -872,6 +870,8 @@ static int move_to_new_folio(struct folio *dst, struct folio *src, rc = fallback_migrate_page(mapping, &dst->page, &src->page, mode); } else { + const struct movable_operations *mops; + /* * In case of non-lru page, it could be released after * isolation step. In that case, we shouldn't try migration. @@ -883,8 +883,8 @@ static int move_to_new_folio(struct folio *dst, struct folio *src, goto out; } - rc = mapping->a_ops->migratepage(mapping, &dst->page, - &src->page, mode); + mops = page_movable_ops(&src->page); + rc = mops->migrate_page(&dst->page, &src->page, mode); WARN_ON_ONCE(rc == MIGRATEPAGE_SUCCESS && !folio_test_isolated(src)); } diff --git a/mm/util.c b/mm/util.c index 0837570c9225..53af0e79d3e4 100644 --- a/mm/util.c +++ b/mm/util.c @@ -804,10 +804,10 @@ struct address_space *folio_mapping(struct folio *folio) return swap_address_space(folio_swap_entry(folio)); mapping = folio->mapping; - if ((unsigned long)mapping & PAGE_MAPPING_ANON) + if ((unsigned long)mapping & PAGE_MAPPING_FLAGS) return NULL; - return (void *)((unsigned long)mapping & ~PAGE_MAPPING_FLAGS); + return mapping; } EXPORT_SYMBOL(folio_mapping); diff --git a/mm/z3fold.c b/mm/z3fold.c index f41f8b0d9e9a..cf71da10d04e 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -34,15 +34,11 @@ #include #include #include -#include -#include -#include #include #include #include #include #include -#include #include /* @@ -149,7 +145,6 @@ struct z3fold_header { * @compact_wq: workqueue for page layout background optimization * @release_wq: workqueue for safe page release * @work: work_struct for safe page release - * @inode: inode for z3fold pseudo filesystem * * This structure is allocated at pool creation time and maintains metadata * pertaining to a particular z3fold pool. @@ -169,7 +164,6 @@ struct z3fold_pool { struct workqueue_struct *compact_wq; struct workqueue_struct *release_wq; struct work_struct work; - struct inode *inode; }; /* @@ -334,54 +328,6 @@ static inline void free_handle(unsigned long handle, struct z3fold_header *zhdr) } } -static int z3fold_init_fs_context(struct fs_context *fc) -{ - return init_pseudo(fc, Z3FOLD_MAGIC) ? 0 : -ENOMEM; -} - -static struct file_system_type z3fold_fs = { - .name = "z3fold", - .init_fs_context = z3fold_init_fs_context, - .kill_sb = kill_anon_super, -}; - -static struct vfsmount *z3fold_mnt; -static int __init z3fold_mount(void) -{ - int ret = 0; - - z3fold_mnt = kern_mount(&z3fold_fs); - if (IS_ERR(z3fold_mnt)) - ret = PTR_ERR(z3fold_mnt); - - return ret; -} - -static void z3fold_unmount(void) -{ - kern_unmount(z3fold_mnt); -} - -static const struct address_space_operations z3fold_aops; -static int z3fold_register_migration(struct z3fold_pool *pool) -{ - pool->inode = alloc_anon_inode(z3fold_mnt->mnt_sb); - if (IS_ERR(pool->inode)) { - pool->inode = NULL; - return 1; - } - - pool->inode->i_mapping->private_data = pool; - pool->inode->i_mapping->a_ops = &z3fold_aops; - return 0; -} - -static void z3fold_unregister_migration(struct z3fold_pool *pool) -{ - if (pool->inode) - iput(pool->inode); -} - /* Initializes the z3fold header of a newly allocated z3fold page */ static struct z3fold_header *init_z3fold_page(struct page *page, bool headless, struct z3fold_pool *pool, gfp_t gfp) @@ -1002,14 +948,10 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp, pool->release_wq = create_singlethread_workqueue(pool->name); if (!pool->release_wq) goto out_wq; - if (z3fold_register_migration(pool)) - goto out_rwq; INIT_WORK(&pool->work, free_pages_work); pool->ops = ops; return pool; -out_rwq: - destroy_workqueue(pool->release_wq); out_wq: destroy_workqueue(pool->compact_wq); out_unbuddied: @@ -1043,11 +985,12 @@ static void z3fold_destroy_pool(struct z3fold_pool *pool) destroy_workqueue(pool->compact_wq); destroy_workqueue(pool->release_wq); - z3fold_unregister_migration(pool); free_percpu(pool->unbuddied); kfree(pool); } +static const struct movable_operations z3fold_mops; + /** * z3fold_alloc() - allocates a region of a given size * @pool: z3fold pool from which to allocate @@ -1117,11 +1060,11 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp, } if (can_sleep) { lock_page(page); - __SetPageMovable(page, pool->inode->i_mapping); + __SetPageMovable(page, &z3fold_mops); unlock_page(page); } else { WARN_ON(!trylock_page(page)); - __SetPageMovable(page, pool->inode->i_mapping); + __SetPageMovable(page, &z3fold_mops); unlock_page(page); } z3fold_page_lock(zhdr); @@ -1554,12 +1497,11 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode) return false; } -static int z3fold_page_migrate(struct address_space *mapping, struct page *newpage, - struct page *page, enum migrate_mode mode) +static int z3fold_page_migrate(struct page *newpage, struct page *page, + enum migrate_mode mode) { struct z3fold_header *zhdr, *new_zhdr; struct z3fold_pool *pool; - struct address_space *new_mapping; VM_BUG_ON_PAGE(!PageMovable(page), page); VM_BUG_ON_PAGE(!PageIsolated(page), page); @@ -1592,7 +1534,6 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa * so we only have to reinitialize it. */ INIT_LIST_HEAD(&new_zhdr->buddy); - new_mapping = page_mapping(page); __ClearPageMovable(page); get_page(newpage); @@ -1608,7 +1549,7 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa spin_lock(&pool->lock); list_add(&newpage->lru, &pool->lru); spin_unlock(&pool->lock); - __SetPageMovable(newpage, new_mapping); + __SetPageMovable(newpage, &z3fold_mops); z3fold_page_unlock(new_zhdr); queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work); @@ -1642,9 +1583,9 @@ static void z3fold_page_putback(struct page *page) z3fold_page_unlock(zhdr); } -static const struct address_space_operations z3fold_aops = { +static const struct movable_operations z3fold_mops = { .isolate_page = z3fold_page_isolate, - .migratepage = z3fold_page_migrate, + .migrate_page = z3fold_page_migrate, .putback_page = z3fold_page_putback, }; @@ -1746,17 +1687,11 @@ MODULE_ALIAS("zpool-z3fold"); static int __init init_z3fold(void) { - int ret; - /* * Make sure the z3fold header is not larger than the page size and * there has remaining spaces for its buddy. */ BUILD_BUG_ON(ZHDR_SIZE_ALIGNED > PAGE_SIZE - CHUNK_SIZE); - ret = z3fold_mount(); - if (ret) - return ret; - zpool_register_driver(&z3fold_zpool_driver); return 0; @@ -1764,7 +1699,6 @@ static int __init init_z3fold(void) static void __exit exit_z3fold(void) { - z3fold_unmount(); zpool_unregister_driver(&z3fold_zpool_driver); } diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 5d5fc04385b8..71d6edcbea48 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -41,7 +41,6 @@ #include #include #include -#include #include #include #include @@ -59,8 +58,6 @@ #include #include #include -#include -#include #include #include #include @@ -177,10 +174,6 @@ struct zs_size_stat { static struct dentry *zs_stat_root; #endif -#ifdef CONFIG_COMPACTION -static struct vfsmount *zsmalloc_mnt; -#endif - /* * We assign a page to ZS_ALMOST_EMPTY fullness group when: * n <= N / f, where @@ -252,7 +245,6 @@ struct zs_pool { struct dentry *stat_dentry; #endif #ifdef CONFIG_COMPACTION - struct inode *inode; struct work_struct free_work; #endif /* protect page/zspage migration */ @@ -271,6 +263,7 @@ struct zspage { unsigned int freeobj; struct page *first_page; struct list_head list; /* fullness list */ + struct zs_pool *pool; #ifdef CONFIG_COMPACTION rwlock_t lock; #endif @@ -295,8 +288,6 @@ static bool ZsHugePage(struct zspage *zspage) } #ifdef CONFIG_COMPACTION -static int zs_register_migration(struct zs_pool *pool); -static void zs_unregister_migration(struct zs_pool *pool); static void migrate_lock_init(struct zspage *zspage); static void migrate_read_lock(struct zspage *zspage); static void migrate_read_unlock(struct zspage *zspage); @@ -307,10 +298,6 @@ static void kick_deferred_free(struct zs_pool *pool); static void init_deferred_free(struct zs_pool *pool); static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage); #else -static int zsmalloc_mount(void) { return 0; } -static void zsmalloc_unmount(void) {} -static int zs_register_migration(struct zs_pool *pool) { return 0; } -static void zs_unregister_migration(struct zs_pool *pool) {} static void migrate_lock_init(struct zspage *zspage) {} static void migrate_read_lock(struct zspage *zspage) {} static void migrate_read_unlock(struct zspage *zspage) {} @@ -1083,6 +1070,7 @@ static struct zspage *alloc_zspage(struct zs_pool *pool, create_page_chain(class, zspage, pages); init_zspage(class, zspage); + zspage->pool = pool; return zspage; } @@ -1754,33 +1742,6 @@ static void lock_zspage(struct zspage *zspage) migrate_read_unlock(zspage); } -static int zs_init_fs_context(struct fs_context *fc) -{ - return init_pseudo(fc, ZSMALLOC_MAGIC) ? 0 : -ENOMEM; -} - -static struct file_system_type zsmalloc_fs = { - .name = "zsmalloc", - .init_fs_context = zs_init_fs_context, - .kill_sb = kill_anon_super, -}; - -static int zsmalloc_mount(void) -{ - int ret = 0; - - zsmalloc_mnt = kern_mount(&zsmalloc_fs); - if (IS_ERR(zsmalloc_mnt)) - ret = PTR_ERR(zsmalloc_mnt); - - return ret; -} - -static void zsmalloc_unmount(void) -{ - kern_unmount(zsmalloc_mnt); -} - static void migrate_lock_init(struct zspage *zspage) { rwlock_init(&zspage->lock); @@ -1823,6 +1784,8 @@ static void dec_zspage_isolation(struct zspage *zspage) zspage->isolated--; } +static const struct movable_operations zsmalloc_mops; + static void replace_sub_page(struct size_class *class, struct zspage *zspage, struct page *newpage, struct page *oldpage) { @@ -1843,7 +1806,7 @@ static void replace_sub_page(struct size_class *class, struct zspage *zspage, set_first_obj_offset(newpage, get_first_obj_offset(oldpage)); if (unlikely(ZsHugePage(zspage))) newpage->index = oldpage->index; - __SetPageMovable(newpage, page_mapping(oldpage)); + __SetPageMovable(newpage, &zsmalloc_mops); } static bool zs_page_isolate(struct page *page, isolate_mode_t mode) @@ -1865,8 +1828,8 @@ static bool zs_page_isolate(struct page *page, isolate_mode_t mode) return true; } -static int zs_page_migrate(struct address_space *mapping, struct page *newpage, - struct page *page, enum migrate_mode mode) +static int zs_page_migrate(struct page *newpage, struct page *page, + enum migrate_mode mode) { struct zs_pool *pool; struct size_class *class; @@ -1889,14 +1852,15 @@ static int zs_page_migrate(struct address_space *mapping, struct page *newpage, VM_BUG_ON_PAGE(!PageMovable(page), page); VM_BUG_ON_PAGE(!PageIsolated(page), page); - pool = mapping->private_data; + /* The page is locked, so this pointer must remain valid */ + zspage = get_zspage(page); + pool = zspage->pool; /* * The pool migrate_lock protects the race between zpage migration * and zs_free. */ write_lock(&pool->migrate_lock); - zspage = get_zspage(page); class = zspage_class(pool, zspage); /* @@ -1964,31 +1928,12 @@ static void zs_page_putback(struct page *page) migrate_write_unlock(zspage); } -static const struct address_space_operations zsmalloc_aops = { +static const struct movable_operations zsmalloc_mops = { .isolate_page = zs_page_isolate, - .migratepage = zs_page_migrate, + .migrate_page = zs_page_migrate, .putback_page = zs_page_putback, }; -static int zs_register_migration(struct zs_pool *pool) -{ - pool->inode = alloc_anon_inode(zsmalloc_mnt->mnt_sb); - if (IS_ERR(pool->inode)) { - pool->inode = NULL; - return 1; - } - - pool->inode->i_mapping->private_data = pool; - pool->inode->i_mapping->a_ops = &zsmalloc_aops; - return 0; -} - -static void zs_unregister_migration(struct zs_pool *pool) -{ - flush_work(&pool->free_work); - iput(pool->inode); -} - /* * Caller should hold page_lock of all pages in the zspage * In here, we cannot use zspage meta data. @@ -2032,6 +1977,11 @@ static void kick_deferred_free(struct zs_pool *pool) schedule_work(&pool->free_work); } +static void zs_flush_migration(struct zs_pool *pool) +{ + flush_work(&pool->free_work); +} + static void init_deferred_free(struct zs_pool *pool) { INIT_WORK(&pool->free_work, async_free_zspage); @@ -2043,10 +1993,12 @@ static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) do { WARN_ON(!trylock_page(page)); - __SetPageMovable(page, pool->inode->i_mapping); + __SetPageMovable(page, &zsmalloc_mops); unlock_page(page); } while ((page = get_next_page(page)) != NULL); } +#else +static inline void zs_flush_migration(struct zs_pool *pool) { } #endif /* @@ -2324,9 +2276,6 @@ struct zs_pool *zs_create_pool(const char *name) /* debug only, don't abort if it fails */ zs_pool_stat_create(pool, name); - if (zs_register_migration(pool)) - goto err; - /* * Not critical since shrinker is only used to trigger internal * defragmentation of the pool which is pretty optional thing. If @@ -2348,7 +2297,7 @@ void zs_destroy_pool(struct zs_pool *pool) int i; zs_unregister_shrinker(pool); - zs_unregister_migration(pool); + zs_flush_migration(pool); zs_pool_stat_destroy(pool); for (i = 0; i < ZS_SIZE_CLASSES; i++) { @@ -2380,14 +2329,10 @@ static int __init zs_init(void) { int ret; - ret = zsmalloc_mount(); - if (ret) - goto out; - ret = cpuhp_setup_state(CPUHP_MM_ZS_PREPARE, "mm/zsmalloc:prepare", zs_cpu_prepare, zs_cpu_dead); if (ret) - goto hp_setup_fail; + goto out; #ifdef CONFIG_ZPOOL zpool_register_driver(&zs_zpool_driver); @@ -2397,8 +2342,6 @@ static int __init zs_init(void) return 0; -hp_setup_fail: - zsmalloc_unmount(); out: return ret; } @@ -2408,7 +2351,6 @@ static void __exit zs_exit(void) #ifdef CONFIG_ZPOOL zpool_unregister_driver(&zs_zpool_driver); #endif - zsmalloc_unmount(); cpuhp_remove_state(CPUHP_MM_ZS_PREPARE); zs_stat_exit();