Hello Jan,
> slot = path->slots[0];
> ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
> + spin_lock(&fs_info->qgroup_lock);
Why we need hold qgroup_lock here? would you please explain...
Thanks,
Wang
> btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
> btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
> - /* XXX scan */
> + btrfs_set_qgroup_status_rescan(l, ptr,
> + fs_info->qgroup_rescan_progress.objectid);
> + spin_unlock(&fs_info->qgroup_lock);
>
> btrfs_mark_buffer_dirty(l);
>
> @@ -830,7 +854,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
> fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
> BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
> btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
> - btrfs_set_qgroup_status_scan(leaf, ptr, 0);
> + btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
>
> btrfs_mark_buffer_dirty(leaf);
>
> @@ -894,10 +918,11 @@ out:
> return ret;
> }
>
> -int btrfs_quota_rescan(struct btrfs_fs_info *fs_info)
> +static void qgroup_dirty(struct btrfs_fs_info *fs_info,
> + struct btrfs_qgroup *qgroup)
> {
> - /* FIXME */
> - return 0;
> + if (list_empty(&qgroup->dirty))
> + list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
> }
>
> int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
> @@ -1045,13 +1070,6 @@ unlock:
> return ret;
> }
>
> -static void qgroup_dirty(struct btrfs_fs_info *fs_info,
> - struct btrfs_qgroup *qgroup)
> -{
> - if (list_empty(&qgroup->dirty))
> - list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
> -}
> -
> /*
> * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts
> * the modification into a list that's later used by btrfs_end_transaction to
> @@ -1256,6 +1274,15 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
> BUG();
> }
>
> + mutex_lock(&fs_info->qgroup_rescan_lock);
> + if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
> + if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
> + mutex_unlock(&fs_info->qgroup_rescan_lock);
> + return 0;
> + }
> + }
> + mutex_unlock(&fs_info->qgroup_rescan_lock);
> +
> /*
> * the delayed ref sequence number we pass depends on the direction of
> * the operation. for add operations, we pass (node->seq - 1) to skip
> @@ -1269,7 +1296,17 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
> if (ret < 0)
> return ret;
>
> + mutex_lock(&fs_info->qgroup_rescan_lock);
> spin_lock(&fs_info->qgroup_lock);
> + if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
> + if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
> + ret = 0;
> + mutex_unlock(&fs_info->qgroup_rescan_lock);
> + goto unlock;
> + }
> + }
> + mutex_unlock(&fs_info->qgroup_rescan_lock);
> +
> quota_root = fs_info->quota_root;
> if (!quota_root)
> goto unlock;
> @@ -1652,3 +1689,233 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
> trans->delayed_ref_elem.seq);
> BUG();
> }
> +
> +/*
> + * returns < 0 on error, 0 when more leafs are to be scanned.
> + * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared.
> + */
> +static int
> +qgroup_rescan_leaf(struct qgroup_rescan *qscan, struct btrfs_path *path,
> + struct btrfs_trans_handle *trans, struct ulist *tmp,
> + struct extent_buffer *scratch_leaf)
> +{
> + struct btrfs_key found;
> + struct btrfs_fs_info *fs_info = qscan->fs_info;
> + struct ulist *roots = NULL;
> + struct ulist_node *unode;
> + struct ulist_iterator uiter;
> + struct seq_list tree_mod_seq_elem = {};
> + u64 seq;
> + int slot;
> + int ret;
> +
> + path->leave_spinning = 1;
> + mutex_lock(&fs_info->qgroup_rescan_lock);
> + ret = btrfs_search_slot_for_read(fs_info->extent_root,
> + &fs_info->qgroup_rescan_progress,
> + path, 1, 0);
> +
> + pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n",
> + (unsigned long long)fs_info->qgroup_rescan_progress.objectid,
> + fs_info->qgroup_rescan_progress.type,
> + (unsigned long long)fs_info->qgroup_rescan_progress.offset,
> + ret);
> +
> + if (ret) {
> + /*
> + * The rescan is about to end, we will not be scanning any
> + * further blocks. We cannot unset the RESCAN flag here, because
> + * we want to commit the transaction if everything went well.
> + * To make the live accounting work in this phase, we set our
> + * scan progress pointer such that every real extent objectid
> + * will be smaller.
> + */
> + fs_info->qgroup_rescan_progress.objectid = (u64)-1;
> + btrfs_release_path(path);
> + mutex_unlock(&fs_info->qgroup_rescan_lock);
> + return ret;
> + }
> +
> + btrfs_item_key_to_cpu(path->nodes[0], &found,
> + btrfs_header_nritems(path->nodes[0]) - 1);
> + fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
> +
> + btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
> + memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf));
> + slot = path->slots[0];
> + btrfs_release_path(path);
> + mutex_unlock(&fs_info->qgroup_rescan_lock);
> +
> + for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
> + btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
> + if (found.type != BTRFS_EXTENT_ITEM_KEY)
> + continue;
> + ret = btrfs_find_all_roots(trans, fs_info, found.objectid,
> + tree_mod_seq_elem.seq, &roots);
> + if (ret < 0)
> + break;
> + spin_lock(&fs_info->qgroup_lock);
> + seq = fs_info->qgroup_seq;
> + fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
> +
> + ulist_reinit(tmp);
> + ULIST_ITER_INIT(&uiter);
> + while ((unode = ulist_next(roots, &uiter))) {
> + struct btrfs_qgroup *qg;
> +
> + qg = find_qgroup_rb(fs_info, unode->val);
> + if (!qg)
> + continue;
> +
> + ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC);
> + }
> +
> + /* this is similar to step 2 of btrfs_qgroup_account_ref */
> + ULIST_ITER_INIT(&uiter);
> + while ((unode = ulist_next(tmp, &uiter))) {
> + struct btrfs_qgroup *qg;
> + struct btrfs_qgroup_list *glist;
> +
> + qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux;
> + qg->rfer += found.offset;
> + qg->rfer_cmpr += found.offset;
> + WARN_ON(qg->tag >= seq);
> + WARN_ON(qg->refcnt >= seq);
> + if (qg->refcnt < seq)
> + qg->refcnt = seq + 1;
> + else
> + qg->refcnt = qg->refcnt + 1;
> + qgroup_dirty(fs_info, qg);
> +
> + list_for_each_entry(glist, &qg->groups, next_group) {
> + ulist_add(tmp, glist->group->qgroupid,
> + (uintptr_t)glist->group,
> + GFP_ATOMIC);
> + }
> + }
> +
> + qgroup_account_ref_step3(fs_info, roots, tmp, seq, -1,
> + found.offset);
> +
> + spin_unlock(&fs_info->qgroup_lock);
> + ulist_free(roots);
> + }
> +
> + btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
> +
> + return ret;
> +}
> +
> +static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
> +{
> + struct qgroup_rescan *qscan = container_of(work, struct qgroup_rescan,
> + work);
> + struct btrfs_path *path;
> + struct btrfs_trans_handle *trans = NULL;
> + struct btrfs_fs_info *fs_info = qscan->fs_info;
> + struct ulist *tmp = NULL;
> + struct extent_buffer *scratch_leaf = NULL;
> + int err = -ENOMEM;
> +
> + path = btrfs_alloc_path();
> + if (!path)
> + goto out;
> + tmp = ulist_alloc(GFP_NOFS);
> + if (!tmp)
> + goto out;
> + scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
> + if (!scratch_leaf)
> + goto out;
> +
> + err = 0;
> + while (!err) {
> + trans = btrfs_start_transaction(fs_info->fs_root, 0);
> + if (IS_ERR(trans)) {
> + err = PTR_ERR(trans);
> + break;
> + }
> + err = qgroup_rescan_leaf(qscan, path, trans, tmp, scratch_leaf);
> + if (err > 0)
> + btrfs_commit_transaction(trans, fs_info->fs_root);
> + else
> + btrfs_end_transaction(trans, fs_info->fs_root);
> + }
> +
> +out:
> + kfree(scratch_leaf);
> + ulist_free(tmp);
> + btrfs_free_path(path);
> + kfree(qscan);
> +
> + mutex_lock(&fs_info->qgroup_rescan_lock);
> + fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
> +
> + if (err == 2 &&
> + fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
> + fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
> + } else if (err < 0) {
> + fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
> + }
> + mutex_unlock(&fs_info->qgroup_rescan_lock);
> +
> + if (err >= 0) {
> + pr_info("btrfs: qgroup scan completed%s\n",
> + err == 2 ? " (inconsistency flag cleared)" : "");
> + } else {
> + pr_err("btrfs: qgroup scan failed with %d\n", err);
> + }
> +}
> +
> +static void
> +qgroup_rescan_start(struct btrfs_fs_info *fs_info, struct qgroup_rescan *qscan)
> +{
> + qscan->work.func = btrfs_qgroup_rescan_worker;
> + qscan->fs_info = fs_info;
> +
> + pr_info("btrfs: qgroup scan started\n");
> + btrfs_queue_worker(&fs_info->qgroup_rescan_workers, &qscan->work);
> +}
> +
> +int
> +btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
> +{
> + int ret = 0;
> + struct rb_node *n;
> + struct btrfs_qgroup *qgroup;
> + struct qgroup_rescan *qscan = kmalloc(sizeof(*qscan), GFP_NOFS);
> +
> + if (!qscan)
> + return -ENOMEM;
> +
> + mutex_lock(&fs_info->qgroup_rescan_lock);
> + spin_lock(&fs_info->qgroup_lock);
> + if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
> + ret = -EINPROGRESS;
> + else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
> + ret = -EINVAL;
> + if (ret) {
> + spin_unlock(&fs_info->qgroup_lock);
> + mutex_unlock(&fs_info->qgroup_rescan_lock);
> + kfree(qscan);
> + return ret;
> + }
> +
> + fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
> + memset(&fs_info->qgroup_rescan_progress, 0,
> + sizeof(fs_info->qgroup_rescan_progress));
> +
> + /* clear all current qgroup tracking information */
> + for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
> + qgroup = rb_entry(n, struct btrfs_qgroup, node);
> + qgroup->rfer = 0;
> + qgroup->rfer_cmpr = 0;
> + qgroup->excl = 0;
> + qgroup->excl_cmpr = 0;
> + }
> + spin_unlock(&fs_info->qgroup_lock);
> + mutex_unlock(&fs_info->qgroup_rescan_lock);
> +
> + qgroup_rescan_start(fs_info, qscan);
> +
> + return 0;
> +}
> diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
> index fa3a5f9..ca70f08 100644
> --- a/include/uapi/linux/btrfs.h
> +++ b/include/uapi/linux/btrfs.h
> @@ -376,12 +376,18 @@ struct btrfs_ioctl_get_dev_stats {
>
> #define BTRFS_QUOTA_CTL_ENABLE 1
> #define BTRFS_QUOTA_CTL_DISABLE 2
> -#define BTRFS_QUOTA_CTL_RESCAN 3
> +#define BTRFS_QUOTA_CTL_RESCAN__NOTUSED 3
> struct btrfs_ioctl_quota_ctl_args {
> __u64 cmd;
> __u64 status;
> };
>
> +struct btrfs_ioctl_quota_rescan_args {
> + __u64 flags;
> + __u64 progress;
> + __u64 reserved[6];
> +};
> +
> struct btrfs_ioctl_qgroup_assign_args {
> __u64 assign;
> __u64 src;
> @@ -502,6 +508,10 @@ struct btrfs_ioctl_send_args {
> struct btrfs_ioctl_qgroup_create_args)
> #define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \
> struct btrfs_ioctl_qgroup_limit_args)
> +#define BTRFS_IOC_QUOTA_RESCAN _IOW(BTRFS_IOCTL_MAGIC, 44, \
> + struct btrfs_ioctl_quota_rescan_args)
> +#define BTRFS_IOC_QUOTA_RESCAN_STATUS _IOR(BTRFS_IOCTL_MAGIC, 45, \
> + struct btrfs_ioctl_quota_rescan_args)
> #define BTRFS_IOC_GET_FSLABEL _IOR(BTRFS_IOCTL_MAGIC, 49, \
> char[BTRFS_LABEL_SIZE])
> #define BTRFS_IOC_SET_FSLABEL _IOW(BTRFS_IOCTL_MAGIC, 50, \
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html