On Tue, Apr 16, 2019 at 11:41:46AM -0500, Goldwyn Rodrigues wrote:
> From: Goldwyn Rodrigues <rgoldwyn@xxxxxxxx>
>
> We replace the existing entry to the newly allocated one
> in case of CoW. Also, we mark the entry as PAGECACHE_TAG_TOWRITE
> so writeback marks this entry as writeprotected. This
> helps us snapshots so new write pagefaults after snapshots
> trigger a CoW.
>
> Signed-off-by: Goldwyn Rodrigues <rgoldwyn@xxxxxxxx>
> ---
> fs/dax.c | 23 +++++++++++++++--------
> 1 file changed, 15 insertions(+), 8 deletions(-)
>
> diff --git a/fs/dax.c b/fs/dax.c
> index 45fc2e18969a..d5100cbe8bd2 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -708,14 +708,15 @@ static int copy_user_dax(struct block_device *bdev, struct dax_device *dax_dev,
> */
> static void *dax_insert_entry(struct xa_state *xas,
> struct address_space *mapping, struct vm_fault *vmf,
> - void *entry, pfn_t pfn, unsigned long flags, bool dirty)
> + void *entry, pfn_t pfn, unsigned long flags, bool dirty,
> + bool cow)
I still wish these were flags instead of double booleans that will be
easy to mix up, especially since this is a static function and nobody
else has to see the flags...
#define IE_DIRTY (1 << 0) /* mark entry and inode dirty */
#define IE_REPLACE (1 << 1) /* replacing one page with another */
...otoh maybe I'll just defer to the maintainer. :)
> {
> void *new_entry = dax_make_entry(pfn, flags);
>
> if (dirty)
> __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
>
> - if (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE)) {
> + if (cow || (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE))) {
> unsigned long index = xas->xa_index;
> /* we are replacing a zero page with block mapping */
These comments need updating.
Otherwise looks good to me...
--D
> if (dax_is_pmd_entry(entry))
> @@ -727,12 +728,12 @@ static void *dax_insert_entry(struct xa_state *xas,
>
> xas_reset(xas);
> xas_lock_irq(xas);
> - if (dax_entry_size(entry) != dax_entry_size(new_entry)) {
> + if (cow || (dax_entry_size(entry) != dax_entry_size(new_entry))) {
> dax_disassociate_entry(entry, mapping, false);
> dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address);
> }
>
> - if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
> + if (cow || dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
> /*
> * Only swap our new entry into the page cache if the current
> * entry is a zero page or an empty entry. If a normal PTE or
> @@ -752,6 +753,9 @@ static void *dax_insert_entry(struct xa_state *xas,
> if (dirty)
> xas_set_mark(xas, PAGECACHE_TAG_DIRTY);
>
> + if (cow)
> + xas_set_mark(xas, PAGECACHE_TAG_TOWRITE);
> +
> xas_unlock_irq(xas);
> return entry;
> }
> @@ -1031,7 +1035,7 @@ static vm_fault_t dax_load_hole(struct xa_state *xas,
> vm_fault_t ret;
>
> *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn,
> - DAX_ZERO_PAGE, false);
> + DAX_ZERO_PAGE, false, false);
>
> ret = vmf_insert_mixed(vmf->vma, vaddr, pfn);
> trace_dax_load_hole(inode, vmf, ret);
> @@ -1388,7 +1392,8 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
> memset(addr, 0, PAGE_SIZE);
> }
> entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn,
> - 0, write && !sync);
> + 0, write && !sync,
> + iomap.type == IOMAP_DAX_COW);
>
> /*
> * If we are doing synchronous page fault and inode needs fsync,
> @@ -1467,7 +1472,8 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
>
> pfn = page_to_pfn_t(zero_page);
> *entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn,
> - DAX_PMD | DAX_ZERO_PAGE, false);
> + DAX_PMD | DAX_ZERO_PAGE, false,
> + iomap->type == IOMAP_DAX_COW);
>
> ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
> if (!pmd_none(*(vmf->pmd))) {
> @@ -1590,7 +1596,8 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
> goto finish_iomap;
>
> entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn,
> - DAX_PMD, write && !sync);
> + DAX_PMD, write && !sync,
> + false);
>
> /*
> * If we are doing synchronous page fault and inode needs fsync,
> --
> 2.16.4
>