tmpfs: radix_tree_preloading

Nick has observed that shmem.c still uses GFP_ATOMIC when adding to page cache or swap cache, without any radix tree preload: so tending to deplete emergency reserves of memory. GFP_ATOMIC remains appropriate in shmem_writepage's add_to_swap_cache: it's being called under memory pressure, so must not wait for more memory to become available. But shmem_unuse_inode now has a window in which it can and should preload with GFP_KERNEL, and say GFP_NOWAIT instead of GFP_ATOMIC in its add_to_page_cache. shmem_getpage is not so straightforward: its filepage/swappage integrity relies upon exchanging between caches under spinlock, and it would need a lot of restructuring to place the preloads correctly. Instead, follow its pattern of retrying on races: use GFP_NOWAIT instead of GFP_ATOMIC in add_to_page_cache, and begin each circuit of the repeat loop with a sleeping radix_tree_preload, followed immediately by radix_tree_preload_end - that won't guarantee success in the next add_to_page_cache, but doesn't need to. And we can then remove that bothersome congestion_wait: when needed, it'll automatically get done in the course of the radix_tree_preload. Signed-off-by: Hugh Dickins <hugh@veritas.com> Looks-good-to: Nick Piggin <npiggin@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-04 22:28:54 -08:00 · 2008-02-04 22:28:54 -08:00 · b409f9fcf0
parent 2e0e26c76a
commit b409f9fcf0
1 changed files with 18 additions and 7 deletions
--- a/mm/shmem.c
+++ b/mm/shmem.c
@ -901,12 +901,16 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s
 	error = 1;
 	if (!inode)
 		goto out;
+	error = radix_tree_preload(GFP_KERNEL);
+	if (error)
+		goto out;
+	error = 1;

 	spin_lock(&info->lock);
 	ptr = shmem_swp_entry(info, idx, NULL);
 	if (ptr && ptr->val == entry.val)
 		error = add_to_page_cache(page, inode->i_mapping,
-						idx, GFP_ATOMIC);
+						idx, GFP_NOWAIT);
 	if (error == -EEXIST) {
 		struct page *filepage = find_get_page(inode->i_mapping, idx);
 		error = 1;
@ -931,6 +935,7 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s
 	if (ptr)
 		shmem_swp_unmap(ptr);
 	spin_unlock(&info->lock);
+	radix_tree_preload_end();
 out:
 	unlock_page(page);
 	page_cache_release(page);
@ -1185,6 +1190,16 @@ static int shmem_getpage(struct inode *inode, unsigned long idx,
 		goto done;
 	error = 0;
 	gfp = mapping_gfp_mask(mapping);
+	if (!filepage) {
+		/*
+		 * Try to preload while we can wait, to not make a habit of
+		 * draining atomic reserves; but don't latch on to this cpu.
+		 */
+		error = radix_tree_preload(gfp & ~__GFP_HIGHMEM);
+		if (error)
+			goto failed;
+		radix_tree_preload_end();
+	}

 	spin_lock(&info->lock);
 	shmem_recalc_inode(inode);
@ -1266,7 +1281,7 @@ static int shmem_getpage(struct inode *inode, unsigned long idx,
 			set_page_dirty(filepage);
 			swap_free(swap);
 		} else if (!(error = add_to_page_cache(
-				swappage, mapping, idx, GFP_ATOMIC))) {
+				swappage, mapping, idx, GFP_NOWAIT))) {
 			info->flags |= SHMEM_PAGEIN;
 			shmem_swp_set(info, entry, 0);
 			shmem_swp_unmap(entry);
@ -1280,10 +1295,6 @@ static int shmem_getpage(struct inode *inode, unsigned long idx,
 			spin_unlock(&info->lock);
 			unlock_page(swappage);
 			page_cache_release(swappage);
-			if (error == -ENOMEM) {
-				/* let kswapd refresh zone for GFP_ATOMICs */
-				congestion_wait(WRITE, HZ/50);
-			}
 			goto repeat;
 		}
 	} else if (sgp == SGP_READ && !filepage) {
@ -1338,7 +1349,7 @@ static int shmem_getpage(struct inode *inode, unsigned long idx,
 				shmem_swp_unmap(entry);
 			}
 			if (error || swap.val || 0 != add_to_page_cache_lru(
-					filepage, mapping, idx, GFP_ATOMIC)) {
+					filepage, mapping, idx, GFP_NOWAIT)) {
 				spin_unlock(&info->lock);
 				page_cache_release(filepage);
 				shmem_unacct_blocks(info->flags, 1);