Hi,
I have non-qgroup related comments:
On Wed, Jun 15, 2016 at 03:50:02PM -0700, Mark Fasheh wrote:
> --- a/qgroup-verify.c
> +++ b/qgroup-verify.c
> @@ -35,7 +35,8 @@
> /*#define QGROUP_VERIFY_DEBUG*/
> static unsigned long tot_extents_scanned = 0;
>
> -static void add_bytes(u64 root_objectid, u64 num_bytes, int exclusive);
> +struct qgroup_count;
> +static struct qgroup_count *find_count(u64 qgroupid);
>
> struct qgroup_info {
> u64 referenced;
> @@ -54,6 +55,14 @@ struct qgroup_count {
> struct qgroup_info info;
>
> struct rb_node rb_node;
> +
> + struct list_head groups; /* parents when we are a child group */
> + struct list_head members; /* children when we are a parent
> + group (not currently used but
> + maintained to mirror kernel
> + handling of qgroups) */
A long comment would be better on the line above the declaration.
> +
> + u64 cur_refcnt;
> };
>
> static struct counts_tree {
> @@ -66,6 +75,39 @@ static struct counts_tree {
> static struct rb_root by_bytenr = RB_ROOT;
>
> /*
> + * glue structure to represent the relations between qgroups. Mirrored
"Glue structure ..." ie. capital letter if it's a sentence, there's more
of these.
> + * from kernel.
> + */
> +struct btrfs_qgroup_list {
> + struct list_head next_group;
> + struct list_head next_member;
> + struct qgroup_count *group; /* Parent group */
> + struct qgroup_count *member;
> +};
> +
> +/* allow us to reset ref counts during accounting without zeroing each group */
> +static u64 qgroup_seq = 1ULL;
> +
> +static inline void update_cur_refcnt(struct qgroup_count *c)
> +{
> + if (c->cur_refcnt < qgroup_seq)
> + c->cur_refcnt = qgroup_seq;
> + c->cur_refcnt += 1;
c->cur_refcnt++;
> +}
> +
> +static inline u64 group_get_cur_refcnt(struct qgroup_count *c)
> +{
> + if (c->cur_refcnt < qgroup_seq)
> + return 0;
> + return c->cur_refcnt - qgroup_seq;
> +}
> +
> +static void inc_qgroup_seq(int root_count)
> +{
> + qgroup_seq += root_count + 1;
> +}
> +
> +/*
> * List of interior tree blocks. We walk this list after loading the
> * extent tree to resolve implied refs. For each interior node we'll
> * place a shared ref in the ref tree against each child object. This
> @@ -296,9 +338,10 @@ static void find_parent_roots(struct ulist *roots, u64 parent)
> }
>
> do {
> - if (ref->root)
> - ulist_add(roots, ref->root, 0, 0);
> - else
> + if (ref->root) {
> + if (is_fstree(ref->root))
> + ulist_add(roots, ref->root, 0, 0);
> + } else
} else {
> find_parent_roots(roots, ref->parent);
}
>
> node = rb_next(node);
> @@ -307,6 +350,116 @@ static void find_parent_roots(struct ulist *roots, u64 parent)
> } while (node && ref->bytenr == parent);
> }
>
> +static int account_one_extent(struct ulist *roots, u64 bytenr, u64 num_bytes)
> +{
> + int ret;
> + u64 id, nr_roots, nr_refs;
> + struct qgroup_count *count;
> + struct ulist *counts = ulist_alloc(0);
> + struct ulist *tmp = ulist_alloc(0);
> + struct ulist_iterator uiter;
> + struct ulist_iterator tmp_uiter;
> + struct ulist_node *unode;
> + struct ulist_node *tmp_unode;
> + struct btrfs_qgroup_list *glist;
> +
> + if (!counts || !tmp) {
> + ulist_free(counts);
> + ulist_free(tmp);
> + return ENOMEM;
For a separate patch, but negative error values are preferred.
> + }
> +
> + ULIST_ITER_INIT(&uiter);
> + while ((unode = ulist_next(roots, &uiter))) {
> + BUG_ON(unode->val == 0ULL);
> +
> + /*
> + * For each root, find their corresponding tracking group and
> + * add it to our qgroups list.
> + */
> + count = find_count(unode->val);
> + if (!count)
> + continue;
> +
> + BUG_ON(!is_fstree(unode->val));
> + ret = ulist_add(counts, count->qgroupid, ptr_to_u64(count), 0);
> + if (ret < 0)
> + goto out;
> +
> + /*
> + * Now we look for parents (and parents of
> + * those...). Use a tmp ulist here to avoid re-walking
> + * (and re-incrementing) our already added items on every
> + * loop iteration.
Please reformat to 80 columns.
> + */
> + ulist_reinit(tmp);
> + ret = ulist_add(tmp, count->qgroupid, ptr_to_u64(count), 0);
> + if (ret < 0)
> + goto out;
> +
> + ULIST_ITER_INIT(&tmp_uiter);
> + while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
> + /* bump the refcount on a node every time we
> + * see it. */
/*
* Text...
*/
> + count = u64_to_ptr(tmp_unode->aux);
> + update_cur_refcnt(count);
> +
> + list_for_each_entry(glist, &count->groups, next_group) {
> + struct qgroup_count *parent;
> + parent = glist->group;
> + id = parent->qgroupid;
> +
> + BUG_ON(!count);
> +
> + ret = ulist_add(counts, id, ptr_to_u64(parent),
> + 0);
> + if (ret < 0)
> + goto out;
> + ret = ulist_add(tmp, id, ptr_to_u64(parent),
> + 0);
> + if (ret < 0)
> + goto out;
> + }
> + }
> + }
> +
> + /*
> + * Now that we have gathered up and counted all the groups, we
> + * can add bytes for this ref.
> + */
> + nr_roots = roots->nnodes;
> + ULIST_ITER_INIT(&uiter);
> + while ((unode = ulist_next(counts, &uiter))) {
> + count = u64_to_ptr(unode->aux);
> +
> + nr_refs = group_get_cur_refcnt(count);
> + if (nr_refs) {
> + count->info.referenced += num_bytes;
> + count->info.referenced_compressed += num_bytes;
> +
> + if (nr_refs == nr_roots) {
> + count->info.exclusive += num_bytes;
> + count->info.exclusive_compressed += num_bytes;
> + }
> + }
> +#ifdef QGROUP_VERIFY_DEBUG
> + printf("account (%llu, %llu), qgroup %llu/%llu, rfer %llu,"
> + " excl %llu, refs %llu, roots %llu\n", bytenr, num_bytes,
> + btrfs_qgroup_level(count->qgroupid),
> + btrfs_qgroup_subvid(count->qgroupid),
> + count->info.referenced, count->info.exclusive, nr_refs,
> + nr_roots);
> +#endif
> + }
> +
> + inc_qgroup_seq(roots->nnodes);
> + ret = 0;
> +out:
> + ulist_free(counts);
> + ulist_free(tmp);
> + return ret;
> +}
> +
> static void print_subvol_info(u64 subvolid, u64 bytenr, u64 num_bytes,
> struct ulist *roots);
> /*
> @@ -318,18 +471,15 @@ static void print_subvol_info(u64 subvolid, u64 bytenr, u64 num_bytes,
> * - resolve all possible roots for shared refs, insert each
> * of those into ref_roots ulist (this is a recursive process)
> *
> - * - Walk ref_roots ulist, adding extent bytes to each qgroup count that
> - * cooresponds to a found root.
> + * - With all roots resolved we can account the ref - this is done in
> + * account_one_extent().
> */
> static void account_all_refs(int do_qgroups, u64 search_subvol)
> {
> - int exclusive;
> struct ref *ref;
> struct rb_node *node;
> u64 bytenr, num_bytes;
> struct ulist *roots = ulist_alloc(0);
> - struct ulist_iterator uiter;
> - struct ulist_node *unode;
>
> node = rb_first(&by_bytenr);
> while (node) {
> @@ -347,10 +497,14 @@ static void account_all_refs(int do_qgroups, u64 search_subvol)
> do {
> BUG_ON(ref->bytenr != bytenr);
> BUG_ON(ref->num_bytes != num_bytes);
> - if (ref->root)
> - ulist_add(roots, ref->root, 0, 0);
> - else
> + if (ref->root) {
> + if (is_fstree(ref->root)) {
> + if (ulist_add(roots, ref->root, 0, 0) < 0)
> + goto enomem;
> + }
> + } else {
> find_parent_roots(roots, ref->parent);
> + }
>
> /*
> * When we leave this inner loop, node is set
> @@ -362,29 +516,23 @@ static void account_all_refs(int do_qgroups, u64 search_subvol)
> ref = rb_entry(node, struct ref, bytenr_node);
> } while (node && ref->bytenr == bytenr);
>
> - /*
> - * Now that we have all roots, we can properly account
> - * this extent against the corresponding qgroups.
> - */
> - if (roots->nnodes == 1)
> - exclusive = 1;
> - else
> - exclusive = 0;
> -
> if (search_subvol)
> print_subvol_info(search_subvol, bytenr, num_bytes,
> roots);
>
> - ULIST_ITER_INIT(&uiter);
> - while ((unode = ulist_next(roots, &uiter))) {
> - BUG_ON(unode->val == 0ULL);
> - /* We only want to account fs trees */
> - if (is_fstree(unode->val) && do_qgroups)
> - add_bytes(unode->val, num_bytes, exclusive);
> - }
> + if (!do_qgroups)
> + continue;
> +
> + if (account_one_extent(roots, bytenr, num_bytes))
> + goto enomem;
> }
>
> ulist_free(roots);
> + return;
> +enomem:
> + fprintf(stderr,
> + "ERROR: Out of memory while accounting refs for qgroups!\n");
> + abort();
I don't like this kind of error handling. Fail with a message and return
error to the caller chain.
> }
>
> static u64 resolve_one_root(u64 bytenr)
> @@ -668,6 +816,8 @@ static struct qgroup_count *alloc_count(struct btrfs_disk_key *key,
> item->exclusive = btrfs_qgroup_info_exclusive(leaf, disk);
> item->exclusive_compressed =
> btrfs_qgroup_info_exclusive_compressed(leaf, disk);
> + INIT_LIST_HEAD(&c->groups);
> + INIT_LIST_HEAD(&c->members);
>
> if (insert_count(c)) {
> free(c);
> @@ -677,29 +827,30 @@ static struct qgroup_count *alloc_count(struct btrfs_disk_key *key,
> return c;
> }
>
> -static void add_bytes(u64 root_objectid, u64 num_bytes, int exclusive)
> +static int add_qgroup_relation(u64 memberid, u64 parentid)
> {
> - struct qgroup_count *count = find_count(root_objectid);
> - struct qgroup_info *qg;
> + struct qgroup_count *member;
> + struct qgroup_count *parent;
> + struct btrfs_qgroup_list *list;
>
> - BUG_ON(num_bytes < 4096); /* Random sanity check. */
> + if (memberid > parentid)
> + return 0;
>
> - if (!count)
> - return;
> + member = find_count(memberid);
> + parent = find_count(parentid);
> + if (!member || !parent)
> + return -ENOENT;
>
> - qg = &count->info;
> + list = calloc(1, sizeof(*list));
> + if (!list)
> + return -ENOMEM;
>
> - qg->referenced += num_bytes;
> - /*
> - * count of compressed bytes is unimplemented, so we do the
> - * same as kernel.
> - */
> - qg->referenced_compressed += num_bytes;
> + list->group = parent;
> + list->member = member;
> + list_add_tail(&list->next_group, &member->groups);
> + list_add_tail(&list->next_member, &parent->members);
>
> - if (exclusive) {
> - qg->exclusive += num_bytes;
> - qg->exclusive_compressed += num_bytes;
> - }
> + return 0;
> }
>
> static void read_qgroup_status(struct btrfs_path *path,
> @@ -728,11 +879,18 @@ static int load_quota_info(struct btrfs_fs_info *info)
> struct btrfs_qgroup_info_item *item;
> struct qgroup_count *count;
> int i, nr;
> + int search_relations = 0;
>
> +loop:
> + /*
> + * Do 2 passes, the first allocates group counts and reads status
> + * items. The 2nd pass picks up relation items and glues them
> + * to their respective count structures.
> + */
> btrfs_init_path(&path);
>
> key.offset = 0;
> - key.objectid = 0;
> + key.objectid = search_relations ? 0 : BTRFS_QGROUP_RELATION_KEY;
> key.type = 0;
>
> ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
> @@ -749,17 +907,27 @@ static int load_quota_info(struct btrfs_fs_info *info)
> btrfs_item_key(leaf, &disk_key, i);
> btrfs_disk_key_to_cpu(&key, &disk_key);
>
> + if (search_relations) {
> + if (key.type == BTRFS_QGROUP_RELATION_KEY) {
> + ret = add_qgroup_relation(key.objectid,
> + key.offset);
> + if (ret) {
> + fprintf(stderr,
> + "ERROR: out of memory\n");
Please use the error("...") helper.
> + goto out;
> + }
> + }
> + continue;
> + }
> +
> if (key.type == BTRFS_QGROUP_STATUS_KEY) {
> read_qgroup_status(&path, &counts);
> continue;
> }
> - if (key.type == BTRFS_QGROUP_RELATION_KEY)
> - printf("Ignoring qgroup relation key %llu\n",
> - key.objectid);
>
> /*
> - * Ignore: BTRFS_QGROUP_LIMIT_KEY,
> - * BTRFS_QGROUP_RELATION_KEY
> + * At this point, we can ignore anything that
> + * isn't a qgroup info.
> */
> if (key.type != BTRFS_QGROUP_INFO_KEY)
> continue;
> @@ -791,6 +959,12 @@ static int load_quota_info(struct btrfs_fs_info *info)
>
> ret = 0;
> btrfs_release_path(&path);
> +
> + if (!search_relations) {
> + search_relations = 1;
> + goto loop;
> + }
> +
> out:
> return ret;
> }
> @@ -1035,6 +1209,11 @@ static void print_fields_signed(long long bytes,
> prefix, type, bytes, type, bytes_compressed);
> }
>
> +static inline int qgroup_printable(struct qgroup_count *c)
> +{
> + return !!(c->subvol_exists || btrfs_qgroup_level(c->qgroupid));
> +}
> +
> static int report_qgroup_difference(struct qgroup_count *count, int verbose)
> {
> int is_different;
> @@ -1045,9 +1224,10 @@ static int report_qgroup_difference(struct qgroup_count *count, int verbose)
>
> is_different = excl_diff || ref_diff;
>
> - if (verbose || (is_different && count->subvol_exists)) {
> - printf("Counts for qgroup id: %llu %s\n",
> - (unsigned long long)count->qgroupid,
> + if (verbose || (is_different && qgroup_printable(count))) {
> + printf("Counts for qgroup id: %llu/%llu %s\n",
> + btrfs_qgroup_level(count->qgroupid),
> + btrfs_qgroup_subvid(count->qgroupid),
> is_different ? "are different" : "");
>
> print_fields(info->referenced, info->referenced_compressed,
> @@ -1099,10 +1279,27 @@ void free_qgroup_counts(void)
> {
> struct rb_node *node;
> struct qgroup_count *c;
> + struct btrfs_qgroup_list *glist, *tmpglist;
> +
> node = rb_first(&counts.root);
> while (node) {
> c = rb_entry(node, struct qgroup_count, rb_node);
> +
> + list_for_each_entry_safe(glist, tmpglist, &c->groups,
> + next_group) {
> + list_del(&glist->next_group);
> + list_del(&glist->next_member);
> + free(glist);
> + }
> + list_for_each_entry_safe(glist, tmpglist, &c->members,
> + next_group) {
> + list_del(&glist->next_group);
> + list_del(&glist->next_member);
> + free(glist);
> + }
> +
> node = rb_next(node);
> +
> rb_erase(&c->rb_node, &counts.root);
> free(c);
> }
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html