On Wed, Jun 13, 2012 at 06:19:10PM +0800, Liu Bo wrote:
> we use larger extent state range for both readpages and read endio, so that
> we can lock or unlock less and avoid most of split ops, then we'll reduce write
> locks taken at endio time.
>
> Signed-off-by: Liu Bo <liubo2009@xxxxxxxxxxxxxx>
> ---
> fs/btrfs/extent_io.c | 201 +++++++++++++++++++++++++++++++++++++++++++++-----
> 1 files changed, 182 insertions(+), 19 deletions(-)
>
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index 081fe13..bb66e3c 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -2258,18 +2258,26 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
> struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
> struct bio_vec *bvec = bio->bi_io_vec;
> struct extent_io_tree *tree;
> + struct extent_state *cached = NULL;
> u64 start;
> u64 end;
> int whole_page;
> int mirror;
> int ret;
> + u64 up_start, up_end, un_start, un_end;
> + int up_first, un_first;
> + int for_uptodate[bio->bi_vcnt];
The array size depends on an argument, compiler will handle it by
dynamically expanding the stackframe. What is the expected maximum value
for bi_vcnt ? There's no hard limit AFAICS, the value gets incremented
on each page in the bio. If the function is called from the worker
thread, it's not much of a problem even for values like 128.
Alternate way to avoid over-limit stack consumption is to declare an
array to hold a fair number of items (eg. 16), and fall back to kmalloc
otherwise.
> + int i = 0;
> +
> + up_start = un_start = (u64)-1;
> + up_end = un_end = 0;
> + up_first = un_first = 1;
>
> if (err)
> uptodate = 0;
>
> do {
> struct page *page = bvec->bv_page;
> - struct extent_state *cached = NULL;
>
> pr_debug("end_bio_extent_readpage: bi_vcnt=%d, idx=%d, err=%d, "
> "mirror=%ld\n", bio->bi_vcnt, bio->bi_idx, err,
> @@ -2352,7 +2412,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
> }
> unlock_page(page);
> } else {
> - if (uptodate) {
> + if (for_uptodate[i++]) {
> check_page_uptodate(tree, page);
> } else {
> clearpageuptodate(page);
> @@ -2360,6 +2420,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
> }
> check_page_locked(tree, page);
> }
> + ++bvec;
can you please move the i++ increments here? From reading the code it's
not clear on first sight if the sideeffects are ok.
> } while (bvec <= bvec_end);
>
> bio_put(bio);
> @@ -2554,6 +2615,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
>
> end = page_end;
> while (1) {
> + if (range_lock)
> + break;
with range_lock set, this is equivalent to calling
2896 set_page_extent_mapped(page);
(plus the cleancache code), a few lines above. Is this inteded? It's
actually called with '1' from a single place, process_batch_pages().
> lock_extent(tree, start, end);
> ordered = btrfs_lookup_ordered_extent(inode, start);
> if (!ordered)
> @@ -3497,6 +3560,59 @@ int extent_writepages(struct extent_io_tree *tree,
> return ret;
> }
>
> +struct page_list {
> + struct page *page;
> + struct list_head list;
> +};
> +
> +static int process_batch_pages(struct extent_io_tree *tree,
> + struct address_space *mapping,
> + struct list_head *lock_pages, int *page_cnt,
> + u64 lock_start, u64 lock_end,
> + get_extent_t get_extent, struct bio **bio,
> + unsigned long *bio_flags)
> +{
> + u64 page_start;
> + struct page_list *plist;
> +
> + while (1) {
> + struct btrfs_ordered_extent *ordered = NULL;
> +
> + lock_extent(tree, lock_start, lock_end);
> + page_start = lock_start;
> + while (page_start < lock_end) {
> + ordered = btrfs_lookup_ordered_extent(mapping->host,
> + page_start);
> + if (ordered) {
> + page_start = ordered->file_offset;
> + break;
> + }
> + page_start += PAGE_CACHE_SIZE;
> + }
> + if (!ordered)
> + break;
You're leaving the range [lock_start, lock_end) locked after taking this
branch. Intended?
> + unlock_extent(tree, lock_start, lock_end);
> + btrfs_start_ordered_extent(mapping->host, ordered, 1);
> + btrfs_put_ordered_extent(ordered);
> + }
> +
> + plist = NULL;
> + while (!list_empty(lock_pages)) {
> + plist = list_entry(lock_pages->prev, struct page_list, list);
> +
> + __extent_read_full_page(tree, plist->page, get_extent,
> + bio, 0, bio_flags, 1);
> + page_cache_release(plist->page);
> + list_del(&plist->list);
> + plist->page = NULL;
you can drop this assignment
> + kfree(plist);
> + (*page_cnt)--;
> + }
> +
> + WARN_ON((*page_cnt));
> + return 0;
> +}
> +
> int extent_readpages(struct extent_io_tree *tree,
> struct address_space *mapping,
> struct list_head *pages, unsigned nr_pages,
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html