[PATCH] mm: implement POSIX_FADV_NOREUSE

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Backups, logrotation and indexers don't need files they read to remain
in the page cache. Their pages can be reclaimed early and should not
displace useful pages. POSIX specifices the POSIX_FADV_NOREUSE flag for
these use cases but it's currently a noop.

In our implementation pages marked with the NoReuse flag are added to
the tail of the LRU list the first time they are read. Therefore they
are the first to be reclaimed.

We needed to add flags to the file and page structs in order to pass
down the hint to the actual call to list_add.

Signed-off-by: Matthias Wirth <matthias.wirth@xxxxxxxxx>
Signed-off-by: Lukas Senger <lukas@xxxxxxxxxxxx>
---
 include/linux/fs.h         | 3 +++
 include/linux/mm_inline.h  | 7 ++++++-
 include/linux/page-flags.h | 2 ++
 mm/fadvise.c               | 4 ++++
 mm/filemap.c               | 3 +++
 mm/page_alloc.c            | 1 +
 mm/readahead.c             | 2 ++
 7 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 881accf..3e80149 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -123,6 +123,9 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 /* File is opened with O_PATH; almost nothing can be done with it */
 #define FMODE_PATH		((__force fmode_t)0x4000)
 
+/* Expect one read only (effect on page cache behavior) */
+#define FMODE_NOREUSE		((__force fmode_t)0x8000)
+
 /* File was opened by fanotify and shouldn't generate fanotify events */
 #define FMODE_NONOTIFY		((__force fmode_t)0x1000000)
 
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index cf55945..1bed771 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -27,7 +27,12 @@ static __always_inline void add_page_to_lru_list(struct page *page,
 {
 	int nr_pages = hpage_nr_pages(page);
 	mem_cgroup_update_lru_size(lruvec, lru, nr_pages);
-	list_add(&page->lru, &lruvec->lists[lru]);
+	if (unlikely(PageNoReuse(page))) {
+		ClearPageNoReuse(page);
+		list_add_tail(&page->lru, &lruvec->lists[lru]);
+	} else {
+		list_add(&page->lru, &lruvec->lists[lru]);
+	}
 	__mod_zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru, nr_pages);
 }
 
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index d1fe1a7..ee5af4c 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -109,6 +109,7 @@ enum pageflags {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	PG_compound_lock,
 #endif
+	PG_noreuse,		/* page is added to tail of LRU list */
 	__NR_PAGEFLAGS,
 
 	/* Filesystems */
@@ -206,6 +207,7 @@ __PAGEFLAG(Slab, slab)
 PAGEFLAG(Checked, checked)		/* Used by some filesystems */
 PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned)	/* Xen */
 PAGEFLAG(SavePinned, savepinned);			/* Xen */
+PAGEFLAG(NoReuse, noreuse);
 PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved)
 PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked)
 
diff --git a/mm/fadvise.c b/mm/fadvise.c
index 3bcfd81..387d10a 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -80,6 +80,7 @@ SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
 		f.file->f_ra.ra_pages = bdi->ra_pages;
 		spin_lock(&f.file->f_lock);
 		f.file->f_mode &= ~FMODE_RANDOM;
+		f.file->f_mode &= ~FMODE_NOREUSE;
 		spin_unlock(&f.file->f_lock);
 		break;
 	case POSIX_FADV_RANDOM:
@@ -111,6 +112,9 @@ SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
 					   nrpages);
 		break;
 	case POSIX_FADV_NOREUSE:
+		spin_lock(&f.file->f_lock);
+		f.file->f_mode |= FMODE_NOREUSE;
+		spin_unlock(&f.file->f_lock);
 		break;
 	case POSIX_FADV_DONTNEED:
 		if (!bdi_write_congested(mapping->backing_dev_info))
diff --git a/mm/filemap.c b/mm/filemap.c
index 97474c1..8f57ca8 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1630,6 +1630,9 @@ no_cached_page:
 			desc->error = -ENOMEM;
 			goto out;
 		}
+		if (filp->f_mode & FMODE_NOREUSE)
+			SetPageNoReuse(page);
+
 		error = add_to_page_cache_lru(page, mapping,
 						index, GFP_KERNEL);
 		if (error) {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 336ee92..a756165 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6512,6 +6512,7 @@ static const struct trace_print_flags pageflag_names[] = {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	{1UL << PG_compound_lock,	"compound_lock"	},
 #endif
+	{1UL << PG_noreuse,		"noreuse"	},
 };
 
 static void dump_page_flags(unsigned long flags)
diff --git a/mm/readahead.c b/mm/readahead.c
index 29c5e1a..e8d9221 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -189,6 +189,8 @@ __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
 		list_add(&page->lru, &page_pool);
 		if (page_idx == nr_to_read - lookahead_size)
 			SetPageReadahead(page);
+		if (filp->f_mode & FMODE_NOREUSE)
+			SetPageNoReuse(page);
 		ret++;
 	}
 
-- 
1.8.3.2

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]