Re: [PATCH 2/2] btrfs-progs: btrfsck: verify qgroups above level 0

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

I have non-qgroup related comments:

On Wed, Jun 15, 2016 at 03:50:02PM -0700, Mark Fasheh wrote:
> --- a/qgroup-verify.c
> +++ b/qgroup-verify.c
> @@ -35,7 +35,8 @@
>  /*#define QGROUP_VERIFY_DEBUG*/
>  static unsigned long tot_extents_scanned = 0;
>  
> -static void add_bytes(u64 root_objectid, u64 num_bytes, int exclusive);
> +struct qgroup_count;
> +static struct qgroup_count *find_count(u64 qgroupid);
>  
>  struct qgroup_info {
>  	u64 referenced;
> @@ -54,6 +55,14 @@ struct qgroup_count {
>  	struct qgroup_info info;
>  
>  	struct rb_node rb_node;
> +
> +	struct list_head groups;  /* parents when we are a child group */
> +	struct list_head members; /* children when we are a parent
> +				     group (not currently used but
> +				     maintained to mirror kernel
> +				     handling of qgroups) */

A long comment would be better on the line above the declaration.

> +
> +	u64 cur_refcnt;
>  };
>  
>  static struct counts_tree {
> @@ -66,6 +75,39 @@ static struct counts_tree {
>  static struct rb_root by_bytenr = RB_ROOT;
>  
>  /*
> + * glue structure to represent the relations between qgroups. Mirrored

"Glue structure ..." ie. capital letter if it's a sentence, there's more
of these.

> + * from kernel.
> + */
> +struct btrfs_qgroup_list {
> +	struct list_head next_group;
> +	struct list_head next_member;
> +	struct qgroup_count *group; /* Parent group */
> +	struct qgroup_count *member;
> +};
> +
> +/* allow us to reset ref counts during accounting without zeroing each group */
> +static u64 qgroup_seq = 1ULL;
> +
> +static inline void update_cur_refcnt(struct qgroup_count *c)
> +{
> +	if (c->cur_refcnt < qgroup_seq)
> +		c->cur_refcnt = qgroup_seq;
> +	c->cur_refcnt += 1;

	c->cur_refcnt++;

> +}
> +
> +static inline u64 group_get_cur_refcnt(struct qgroup_count *c)
> +{
> +	if (c->cur_refcnt < qgroup_seq)
> +		return 0;
> +	return c->cur_refcnt - qgroup_seq;
> +}
> +
> +static void inc_qgroup_seq(int root_count)
> +{
> +	qgroup_seq += root_count + 1;
> +}
> +
> +/*
>   * List of interior tree blocks. We walk this list after loading the
>   * extent tree to resolve implied refs. For each interior node we'll
>   * place a shared ref in the ref tree against each child object. This
> @@ -296,9 +338,10 @@ static void find_parent_roots(struct ulist *roots, u64 parent)
>  	}
>  
>  	do {
> -		if (ref->root)
> -			ulist_add(roots, ref->root, 0, 0);
> -		else
> +		if (ref->root) {
> +			if (is_fstree(ref->root))
> +				ulist_add(roots, ref->root, 0, 0);
> +		} else

		} else {

>  			find_parent_roots(roots, ref->parent);

		}

>  
>  		node = rb_next(node);
> @@ -307,6 +350,116 @@ static void find_parent_roots(struct ulist *roots, u64 parent)
>  	} while (node && ref->bytenr == parent);
>  }
>  
> +static int account_one_extent(struct ulist *roots, u64 bytenr, u64 num_bytes)
> +{
> +	int ret;
> +	u64 id, nr_roots, nr_refs;
> +	struct qgroup_count *count;
> +	struct ulist *counts = ulist_alloc(0);
> +	struct ulist *tmp = ulist_alloc(0);
> +	struct ulist_iterator uiter;
> +	struct ulist_iterator tmp_uiter;
> +	struct ulist_node *unode;
> +	struct ulist_node *tmp_unode;
> +	struct btrfs_qgroup_list *glist;
> +
> +	if (!counts || !tmp) {
> +		ulist_free(counts);
> +		ulist_free(tmp);
> +		return ENOMEM;

For a separate patch, but negative error values are preferred.

> +	}
> +
> +	ULIST_ITER_INIT(&uiter);
> +	while ((unode = ulist_next(roots, &uiter))) {
> +		BUG_ON(unode->val == 0ULL);
> +
> +		/*
> +		 * For each root, find their corresponding tracking group and
> +		 * add it to our qgroups list.
> +		 */
> +		count = find_count(unode->val);
> +		if (!count)
> +			continue;
> +
> +		BUG_ON(!is_fstree(unode->val));
> +		ret = ulist_add(counts, count->qgroupid, ptr_to_u64(count), 0);
> +		if (ret < 0)
> +			goto out;
> +
> +		/*
> +		 * Now we look for parents (and parents of
> +		 * those...). Use a tmp ulist here to avoid re-walking
> +		 * (and re-incrementing) our already added items on every
> +		 * loop iteration.

Please reformat to 80 columns.

> +		 */
> +		ulist_reinit(tmp);
> +		ret = ulist_add(tmp, count->qgroupid, ptr_to_u64(count), 0);
> +		if (ret < 0)
> +			goto out;
> +
> +		ULIST_ITER_INIT(&tmp_uiter);
> +		while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
> +			/* bump the refcount on a node every time we
> +			 * see it. */

/*
 * Text...
 */

> +			count = u64_to_ptr(tmp_unode->aux);
> +			update_cur_refcnt(count);
> +
> +			list_for_each_entry(glist, &count->groups, next_group) {
> +				struct qgroup_count *parent;
> +				parent = glist->group;
> +				id = parent->qgroupid;
> +
> +				BUG_ON(!count);
> +
> +				ret = ulist_add(counts, id, ptr_to_u64(parent),
> +						0);
> +				if (ret < 0)
> +					goto out;
> +				ret = ulist_add(tmp, id, ptr_to_u64(parent),
> +						0);
> +				if (ret < 0)
> +					goto out;
> +			}
> +		}
> +	}
> +
> +	/*
> +	 * Now that we have gathered up and counted all the groups, we
> +	 * can add bytes for this ref.
> +	 */
> +	nr_roots = roots->nnodes;
> +	ULIST_ITER_INIT(&uiter);
> +	while ((unode = ulist_next(counts, &uiter))) {
> +		count = u64_to_ptr(unode->aux);
> +
> +		nr_refs = group_get_cur_refcnt(count);
> +		if (nr_refs) {
> +			count->info.referenced += num_bytes;
> +			count->info.referenced_compressed += num_bytes;
> +
> +			if (nr_refs == nr_roots) {
> +				count->info.exclusive += num_bytes;
> +				count->info.exclusive_compressed += num_bytes;
> +			}
> +		}
> +#ifdef QGROUP_VERIFY_DEBUG
> +		printf("account (%llu, %llu), qgroup %llu/%llu, rfer %llu,"
> +		       " excl %llu, refs %llu, roots %llu\n", bytenr, num_bytes,
> +		       btrfs_qgroup_level(count->qgroupid),
> +		       btrfs_qgroup_subvid(count->qgroupid),
> +		       count->info.referenced, count->info.exclusive, nr_refs,
> +		       nr_roots);
> +#endif
> +	}
> +
> +	inc_qgroup_seq(roots->nnodes);
> +	ret = 0;
> +out:
> +	ulist_free(counts);
> +	ulist_free(tmp);
> +	return ret;
> +}
> +
>  static void print_subvol_info(u64 subvolid, u64 bytenr, u64 num_bytes,
>  			      struct ulist *roots);
>  /*
> @@ -318,18 +471,15 @@ static void print_subvol_info(u64 subvolid, u64 bytenr, u64 num_bytes,
>   * - resolve all possible roots for shared refs, insert each
>   *   of those into ref_roots ulist (this is a recursive process)
>   *
> - * - Walk ref_roots ulist, adding extent bytes to each qgroup count that
> - *    cooresponds to a found root.
> + * - With all roots resolved we can account the ref - this is done in
> + *   account_one_extent().
>   */
>  static void account_all_refs(int do_qgroups, u64 search_subvol)
>  {
> -	int exclusive;
>  	struct ref *ref;
>  	struct rb_node *node;
>  	u64 bytenr, num_bytes;
>  	struct ulist *roots = ulist_alloc(0);
> -	struct ulist_iterator uiter;
> -	struct ulist_node *unode;
>  
>  	node = rb_first(&by_bytenr);
>  	while (node) {
> @@ -347,10 +497,14 @@ static void account_all_refs(int do_qgroups, u64 search_subvol)
>  		do {
>  			BUG_ON(ref->bytenr != bytenr);
>  			BUG_ON(ref->num_bytes != num_bytes);
> -			if (ref->root)
> -				ulist_add(roots, ref->root, 0, 0);
> -			else
> +			if (ref->root) {
> +				if (is_fstree(ref->root)) {
> +					if (ulist_add(roots, ref->root, 0, 0) < 0)
> +						goto enomem;
> +				}
> +			} else {
>  				find_parent_roots(roots, ref->parent);
> +			}
>  
>  			/*
>  			 * When we leave this inner loop, node is set
> @@ -362,29 +516,23 @@ static void account_all_refs(int do_qgroups, u64 search_subvol)
>  				ref = rb_entry(node, struct ref, bytenr_node);
>  		} while (node && ref->bytenr == bytenr);
>  
> -		/*
> -		 * Now that we have all roots, we can properly account
> -		 * this extent against the corresponding qgroups.
> -		 */
> -		if (roots->nnodes == 1)
> -			exclusive = 1;
> -		else
> -			exclusive = 0;
> -
>  		if (search_subvol)
>  			print_subvol_info(search_subvol, bytenr, num_bytes,
>  					  roots);
>  
> -		ULIST_ITER_INIT(&uiter);
> -		while ((unode = ulist_next(roots, &uiter))) {
> -			BUG_ON(unode->val == 0ULL);
> -			/* We only want to account fs trees */
> -			if (is_fstree(unode->val) && do_qgroups)
> -				add_bytes(unode->val, num_bytes, exclusive);
> -		}
> +		if (!do_qgroups)
> +			continue;
> +
> +		if (account_one_extent(roots, bytenr, num_bytes))
> +			goto enomem;
>  	}
>  
>  	ulist_free(roots);
> +	return;
> +enomem:
> +	fprintf(stderr,
> +		"ERROR: Out of memory while accounting refs for qgroups!\n");
> +	abort();

I don't like this kind of error handling. Fail with a message and return
error to the caller chain.

>  }
>  
>  static u64 resolve_one_root(u64 bytenr)
> @@ -668,6 +816,8 @@ static struct qgroup_count *alloc_count(struct btrfs_disk_key *key,
>  		item->exclusive = btrfs_qgroup_info_exclusive(leaf, disk);
>  		item->exclusive_compressed =
>  			btrfs_qgroup_info_exclusive_compressed(leaf, disk);
> +		INIT_LIST_HEAD(&c->groups);
> +		INIT_LIST_HEAD(&c->members);
>  
>  		if (insert_count(c)) {
>  			free(c);
> @@ -677,29 +827,30 @@ static struct qgroup_count *alloc_count(struct btrfs_disk_key *key,
>  	return c;
>  }
>  
> -static void add_bytes(u64 root_objectid, u64 num_bytes, int exclusive)
> +static int add_qgroup_relation(u64 memberid, u64 parentid)
>  {
> -	struct qgroup_count *count = find_count(root_objectid);
> -	struct qgroup_info *qg;
> +	struct qgroup_count *member;
> +	struct qgroup_count *parent;
> +	struct btrfs_qgroup_list *list;
>  
> -	BUG_ON(num_bytes < 4096); /* Random sanity check. */
> +	if (memberid > parentid)
> +		return 0;
>  
> -	if (!count)
> -		return;
> +	member = find_count(memberid);
> +	parent = find_count(parentid);
> +	if (!member || !parent)
> +		return -ENOENT;
>  
> -	qg = &count->info;
> +	list = calloc(1, sizeof(*list));
> +	if (!list)
> +		return -ENOMEM;
>  
> -	qg->referenced += num_bytes;
> -	/*
> -	 * count of compressed bytes is unimplemented, so we do the
> -	 * same as kernel.
> -	 */
> -	qg->referenced_compressed += num_bytes;
> +	list->group = parent;
> +	list->member = member;
> +	list_add_tail(&list->next_group, &member->groups);
> +	list_add_tail(&list->next_member, &parent->members);
>  
> -	if (exclusive) {
> -		qg->exclusive += num_bytes;
> -		qg->exclusive_compressed += num_bytes;
> -	}
> +	return 0;
>  }
>  
>  static void read_qgroup_status(struct btrfs_path *path,
> @@ -728,11 +879,18 @@ static int load_quota_info(struct btrfs_fs_info *info)
>  	struct btrfs_qgroup_info_item *item;
>  	struct qgroup_count *count;
>  	int i, nr;
> +	int search_relations = 0;
>  
> +loop:
> +	/*
> +	 * Do 2 passes, the first allocates group counts and reads status
> +	 * items. The 2nd pass picks up relation items and glues them
> +	 * to their respective count structures.
> +	 */
>  	btrfs_init_path(&path);
>  
>  	key.offset = 0;
> -	key.objectid = 0;
> +	key.objectid = search_relations ? 0 : BTRFS_QGROUP_RELATION_KEY;
>  	key.type = 0;
>  
>  	ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
> @@ -749,17 +907,27 @@ static int load_quota_info(struct btrfs_fs_info *info)
>  			btrfs_item_key(leaf, &disk_key, i);
>  			btrfs_disk_key_to_cpu(&key, &disk_key);
>  
> +			if (search_relations) {
> +				if (key.type == BTRFS_QGROUP_RELATION_KEY) {
> +					ret = add_qgroup_relation(key.objectid,
> +								  key.offset);
> +					if (ret) {
> +						fprintf(stderr,
> +						"ERROR: out of memory\n");

Please use the error("...") helper.

> +						goto out;
> +					}
> +				}
> +				continue;
> +			}
> +
>  			if (key.type == BTRFS_QGROUP_STATUS_KEY) {
>  				read_qgroup_status(&path, &counts);
>  				continue;
>  			}
> -			if (key.type == BTRFS_QGROUP_RELATION_KEY)
> -				printf("Ignoring qgroup relation key %llu\n",
> -				       key.objectid);
>  
>  			/*
> -			 * Ignore: BTRFS_QGROUP_LIMIT_KEY,
> -			 * 	   BTRFS_QGROUP_RELATION_KEY
> +			 * At this point, we can ignore anything that
> +			 * isn't a qgroup info.
>  			 */
>  			if (key.type != BTRFS_QGROUP_INFO_KEY)
>  				continue;
> @@ -791,6 +959,12 @@ static int load_quota_info(struct btrfs_fs_info *info)
>  
>  	ret = 0;
>  	btrfs_release_path(&path);
> +
> +	if (!search_relations) {
> +		search_relations = 1;
> +		goto loop;
> +	}
> +
>  out:
>  	return ret;
>  }
> @@ -1035,6 +1209,11 @@ static void print_fields_signed(long long bytes,
>  	       prefix, type, bytes, type, bytes_compressed);
>  }
>  
> +static inline int qgroup_printable(struct qgroup_count *c)
> +{
> +	return !!(c->subvol_exists || btrfs_qgroup_level(c->qgroupid));
> +}
> +
>  static int report_qgroup_difference(struct qgroup_count *count, int verbose)
>  {
>  	int is_different;
> @@ -1045,9 +1224,10 @@ static int report_qgroup_difference(struct qgroup_count *count, int verbose)
>  
>  	is_different = excl_diff || ref_diff;
>  
> -	if (verbose || (is_different && count->subvol_exists)) {
> -		printf("Counts for qgroup id: %llu %s\n",
> -		       (unsigned long long)count->qgroupid,
> +	if (verbose || (is_different && qgroup_printable(count))) {
> +		printf("Counts for qgroup id: %llu/%llu %s\n",
> +		       btrfs_qgroup_level(count->qgroupid),
> +		       btrfs_qgroup_subvid(count->qgroupid),
>  		       is_different ? "are different" : "");
>  
>  		print_fields(info->referenced, info->referenced_compressed,
> @@ -1099,10 +1279,27 @@ void free_qgroup_counts(void)
>  {
>  	struct rb_node *node;
>  	struct qgroup_count *c;
> +	struct btrfs_qgroup_list *glist, *tmpglist;
> +
>  	node = rb_first(&counts.root);
>  	while (node) {
>  		c = rb_entry(node, struct qgroup_count, rb_node);
> +
> +		list_for_each_entry_safe(glist, tmpglist, &c->groups,
> +					 next_group) {
> +			list_del(&glist->next_group);
> +			list_del(&glist->next_member);
> +			free(glist);
> +		}
> +		list_for_each_entry_safe(glist, tmpglist, &c->members,
> +					 next_group) {
> +			list_del(&glist->next_group);
> +			list_del(&glist->next_member);
> +			free(glist);
> +		}
> +
>  		node = rb_next(node);
> +
>  		rb_erase(&c->rb_node, &counts.root);
>  		free(c);
>  	}
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Filesystem Development]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux