On 05/12/2018 13:28, Goldwyn Rodrigues wrote:
[...]
> +static int copy_extent_page(struct extent_map *em, void *daddr, u64 pos)
> +{
> + struct dax_device *dax_dev;
^ space instead of tabs?
> + void *saddr;
> + sector_t start;
> + size_t len;
> +
> + if (em->block_start == EXTENT_MAP_HOLE) {
> + memset(daddr, 0, PAGE_SIZE);
> + } else {
> + dax_dev = fs_dax_get_by_bdev(em->bdev);
> + start = (get_start_sect(em->bdev) << 9) + (em->block_start + (pos - em->start));
> + len = dax_direct_access(dax_dev, PHYS_PFN(start), 1, &saddr, NULL);
> + memcpy(daddr, saddr, PAGE_SIZE);
> + }
> + free_extent_map(em);
> +
> + return 0;
> +}
> +
> +
copy_extent_page() always returns 0, why not make it void?
Plus a nit: double newline.
> +ssize_t btrfs_file_dax_write(struct kiocb *iocb, struct iov_iter *from)
> +{
> + ssize_t ret, done = 0, count = iov_iter_count(from);
> + struct inode *inode = file_inode(iocb->ki_filp);
^ again spaces vs tabs.
> + u64 pos = iocb->ki_pos;
> + u64 start = round_down(pos, PAGE_SIZE);
> + u64 end = round_up(pos + count, PAGE_SIZE);
> + struct extent_state *cached_state = NULL;
> + struct extent_changeset *data_reserved = NULL;
> + struct extent_map *first = NULL, *last = NULL;
> +
> + ret = btrfs_delalloc_reserve_space(inode, &data_reserved, start, end - start);
> + if (ret < 0)
> + return ret;
> +
> + /* Grab a reference of the first extent to copy data */
> + if (start < pos) {
> + first = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, end - start, 0);
> + if (IS_ERR(first)) {
> + ret = PTR_ERR(first);
> + goto out2;
> + }
> + }
You're using 'end - start' at least twice here, maybe you could move
'len' out of the loop and use it for btrfs_delalloc_reserve_space() and
btrfs_get_extent() as well.
> +
> + /* Grab a reference of the last extent to copy data */
> + if (pos + count < end) {
> + last = btrfs_get_extent(BTRFS_I(inode), NULL, 0, end - PAGE_SIZE, PAGE_SIZE, 0);
> + if (IS_ERR(last)) {
> + ret = PTR_ERR(last);
> + goto out2;
> + }
> + }
> +
> + lock_extent_bits(&BTRFS_I(inode)->io_tree, start, end, &cached_state);
> + while (done < count) {
> + struct extent_map *em;
> + struct dax_device *dax_dev;
> + int offset = pos & (PAGE_SIZE - 1);
> + u64 estart = round_down(pos, PAGE_SIZE);
> + u64 elen = end - estart;
> + size_t len = count - done;
> + sector_t dstart;
> + void *daddr;
> + ssize_t maplen;
> +
> + /* Read the current extent */
> + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, estart, elen, 0);
Space again.
> + if (IS_ERR(em)) {
> + ret = PTR_ERR(em);
> + goto out;
> + }
> +
> + /* Get a new extent */
> + ret = btrfs_get_extent_map_write(&em, NULL, inode, estart, elen);
> + if (ret < 0)
> + goto out;
> +
> + dax_dev = fs_dax_get_by_bdev(em->bdev);
> + /* Calculate start address start of destination extent */
> + dstart = (get_start_sect(em->bdev) << 9) + em->block_start;
> + maplen = dax_direct_access(dax_dev, PHYS_PFN(dstart),
> + PHYS_PFN(em->len), &daddr, NULL);
> +
> + /* Copy front of extent page */
> + if (offset)
> + ret = copy_extent_page(first, daddr, estart);
> +
> + /* Copy end of extent page */
> + if ((pos + len > estart + PAGE_SIZE) && (pos + len < em->start + em->len))
> + ret = copy_extent_page(last, daddr + em->len - PAGE_SIZE, em->start + em->len - PAGE_SIZE);
> +
> + /* Copy the data from the iter */
> + maplen = PFN_PHYS(maplen);
> + maplen -= offset;
> + ret = dax_copy_from_iter(dax_dev, dstart, daddr + offset, maplen, from);
> + if (ret < 0)
> + goto out;
> + pos += ret;
> + done += ret;
> + }
> +out:
out_unlock?
> + unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, end, &cached_state);
> + if (done) {
> + btrfs_update_ordered_extent(inode, start,
> + end - start, true);
> + iocb->ki_pos += done;
> + if (iocb->ki_pos > i_size_read(inode))
> + i_size_write(inode, iocb->ki_pos);
> + }
> +
> + btrfs_delalloc_release_extents(BTRFS_I(inode), count, false);
> +out2:
out?
> + if (count - done > 0)
> + btrfs_delalloc_release_space(inode, data_reserved, pos,
> + count - done, true);
> + extent_changeset_free(data_reserved);
> + return done ? done : ret;
> +
> +}
> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
> index ef6ed93f44d1..29a3b12e6660 100644
> --- a/fs/btrfs/file.c
> +++ b/fs/btrfs/file.c
> @@ -1964,7 +1964,9 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
> if (sync)
> atomic_inc(&BTRFS_I(inode)->sync_writers);
>
> - if (iocb->ki_flags & IOCB_DIRECT) {
> + if (IS_DAX(inode)) {
> + num_written = btrfs_file_dax_write(iocb, from);
> + } else if (iocb->ki_flags & IOCB_DIRECT) {
> num_written = __btrfs_direct_write(iocb, from);
> } else {
> num_written = btrfs_buffered_write(iocb, from);
>
--
Johannes Thumshirn SUSE Labs Filesystems
jthumshirn@xxxxxxx +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850