Re: [PATCH 7/9] block: implement bio_associate_current()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]


On Thu, Feb 16, 2012 at 02:37:56PM -0800, Tejun Heo wrote:
> This patch implements bio_associate_current() which associates the
> specified bio with %current.  The bio will record the associated ioc
> and blkcg at that point and block layer will use the recorded ones
> regardless of which task actually ends up issuing the bio.  bio
> release puts the associated ioc and blkcg.

Excellent.

Why not have bio_associate_current() called from submit_bio()? I would
expect that's what we want most of the time, and the places it's not
(mainly writeback) calling it before submit_bio() would do the right
thing.

It'd make things more consistent - rq_ioc() could be dropped, and
incorrect usage would be more obvious.

> It grabs and remembers ioc and blkcg instead of the task itself
> because task may already be dead by the time the bio is issued making
> ioc and blkcg inaccessible and those are all block layer cares about.
> 
> elevator_set_req_fn() is updated such that the bio elvdata is being
> allocated for is available to the elevator.
> 
> This doesn't update block cgroup policies yet.  Further patches will
> implement the support.
> 
> Signed-off-by: Tejun Heo <tj@xxxxxxxxxx>
> Cc: Vivek Goyal <vgoyal@xxxxxxxxxx>
> Cc: Kent Overstreet <koverstreet@xxxxxxxxxx>
> ---
>  block/blk-core.c          |   30 +++++++++++++++++-----
>  block/cfq-iosched.c       |    3 +-
>  block/elevator.c          |    5 ++-
>  fs/bio.c                  |   61 +++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/bio.h       |    8 ++++++
>  include/linux/blk_types.h |   10 +++++++
>  include/linux/elevator.h  |    6 +++-
>  7 files changed, 111 insertions(+), 12 deletions(-)
> 
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 195c5f7..e6a4f90 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -695,7 +695,7 @@ static inline void blk_free_request(struct request_queue *q, struct request *rq)
>  }
>  
>  static struct request *
> -blk_alloc_request(struct request_queue *q, struct io_cq *icq,
> +blk_alloc_request(struct request_queue *q, struct bio *bio, struct io_cq *icq,
>  		  unsigned int flags, gfp_t gfp_mask)
>  {
>  	struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
> @@ -709,7 +709,7 @@ blk_alloc_request(struct request_queue *q, struct io_cq *icq,
>  
>  	if (flags & REQ_ELVPRIV) {
>  		rq->elv.icq = icq;
> -		if (unlikely(elv_set_request(q, rq, gfp_mask))) {
> +		if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) {
>  			mempool_free(rq, q->rq.rq_pool);
>  			return NULL;
>  		}
> @@ -809,6 +809,20 @@ static bool blk_rq_should_init_elevator(struct bio *bio)
>  }
>  
>  /**
> + * rq_ioc - determine io_context for request allocation
> + * @bio: request being allocated is for this bio (can be %NULL)
> + *
> + * Determine io_context to use for request allocation for @bio.  May return
> + * %NULL if %current->io_context doesn't exist.
> + */
> +static struct io_context *rq_ioc(struct bio *bio)
> +{
> +	if (bio && bio->bi_ioc)
> +		return bio->bi_ioc;
> +	return current->io_context;
> +}
> +
> +/**
>   * get_request - get a free request
>   * @q: request_queue to allocate request from
>   * @rw_flags: RW and SYNC flags
> @@ -835,7 +849,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
>  	int may_queue;
>  retry:
>  	et = q->elevator->type;
> -	ioc = current->io_context;
> +	ioc = rq_ioc(bio);
>  
>  	if (unlikely(blk_queue_dead(q)))
>  		return NULL;
> @@ -918,14 +932,16 @@ retry:
>  
>  	/* create icq if missing */
>  	if ((rw_flags & REQ_ELVPRIV) && unlikely(et->icq_cache && !icq)) {
> -		ioc = create_io_context(gfp_mask, q->node);
> -		if (ioc)
> -			icq = ioc_create_icq(ioc, q, gfp_mask);
> +		create_io_context(gfp_mask, q->node);
> +		ioc = rq_ioc(bio);
> +		if (!ioc)
> +			goto fail_alloc;
> +		icq = ioc_create_icq(ioc, q, gfp_mask);
>  		if (!icq)
>  			goto fail_alloc;
>  	}
>  
> -	rq = blk_alloc_request(q, icq, rw_flags, gfp_mask);
> +	rq = blk_alloc_request(q, bio, icq, rw_flags, gfp_mask);
>  	if (unlikely(!rq))
>  		goto fail_alloc;
>  
> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
> index 00e28a3..b2aabe8 100644
> --- a/block/cfq-iosched.c
> +++ b/block/cfq-iosched.c
> @@ -3299,7 +3299,8 @@ split_cfqq(struct cfq_io_cq *cic, struct cfq_queue *cfqq)
>   * Allocate cfq data structures associated with this request.
>   */
>  static int
> -cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
> +cfq_set_request(struct request_queue *q, struct request *rq, struct bio *bio,
> +		gfp_t gfp_mask)
>  {
>  	struct cfq_data *cfqd = q->elevator->elevator_data;
>  	struct cfq_io_cq *cic = icq_to_cic(rq->elv.icq);
> diff --git a/block/elevator.c b/block/elevator.c
> index 06d9869..6315a27 100644
> --- a/block/elevator.c
> +++ b/block/elevator.c
> @@ -663,12 +663,13 @@ struct request *elv_former_request(struct request_queue *q, struct request *rq)
>  	return NULL;
>  }
>  
> -int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
> +int elv_set_request(struct request_queue *q, struct request *rq,
> +		    struct bio *bio, gfp_t gfp_mask)
>  {
>  	struct elevator_queue *e = q->elevator;
>  
>  	if (e->type->ops.elevator_set_req_fn)
> -		return e->type->ops.elevator_set_req_fn(q, rq, gfp_mask);
> +		return e->type->ops.elevator_set_req_fn(q, rq, bio, gfp_mask);
>  	return 0;
>  }
>  
> diff --git a/fs/bio.c b/fs/bio.c
> index b980ecd..142214b 100644
> --- a/fs/bio.c
> +++ b/fs/bio.c
> @@ -19,12 +19,14 @@
>  #include <linux/swap.h>
>  #include <linux/bio.h>
>  #include <linux/blkdev.h>
> +#include <linux/iocontext.h>
>  #include <linux/slab.h>
>  #include <linux/init.h>
>  #include <linux/kernel.h>
>  #include <linux/module.h>
>  #include <linux/mempool.h>
>  #include <linux/workqueue.h>
> +#include <linux/cgroup.h>
>  #include <scsi/sg.h>		/* for struct sg_iovec */
>  
>  #include <trace/events/block.h>
> @@ -418,6 +420,7 @@ void bio_put(struct bio *bio)
>  	 * last put frees it
>  	 */
>  	if (atomic_dec_and_test(&bio->bi_cnt)) {
> +		bio_disassociate_task(bio);
>  		bio->bi_next = NULL;
>  		bio->bi_destructor(bio);
>  	}
> @@ -1641,6 +1644,64 @@ bad:
>  }
>  EXPORT_SYMBOL(bioset_create);
>  
> +#ifdef CONFIG_BLK_CGROUP
> +/**
> + * bio_associate_current - associate a bio with %current
> + * @bio: target bio
> + *
> + * Associate @bio with %current if it hasn't been associated yet.  Block
> + * layer will treat @bio as if it were issued by %current no matter which
> + * task actually issues it.
> + *
> + * This function takes an extra reference of @task's io_context and blkcg
> + * which will be put when @bio is released.  The caller must own @bio,
> + * ensure %current->io_context exists, and is responsible for synchronizing
> + * calls to this function.
> + */
> +int bio_associate_current(struct bio *bio)
> +{
> +	struct io_context *ioc;
> +	struct cgroup_subsys_state *css;
> +
> +	if (bio->bi_ioc)
> +		return -EBUSY;
> +
> +	ioc = current->io_context;
> +	if (!ioc)
> +		return -ENOENT;
> +
> +	/* acquire active ref on @ioc and associate */
> +	get_io_context_active(ioc);
> +	bio->bi_ioc = ioc;
> +
> +	/* associate blkcg if exists */
> +	rcu_read_lock();
> +	css = task_subsys_state(current, blkio_subsys_id);
> +	if (css && css_tryget(css))
> +		bio->bi_css = css;
> +	rcu_read_unlock();
> +
> +	return 0;
> +}
> +
> +/**
> + * bio_disassociate_task - undo bio_associate_current()
> + * @bio: target bio
> + */
> +void bio_disassociate_task(struct bio *bio)
> +{
> +	if (bio->bi_ioc) {
> +		put_io_context(bio->bi_ioc);
> +		bio->bi_ioc = NULL;
> +	}
> +	if (bio->bi_css) {
> +		css_put(bio->bi_css);
> +		bio->bi_css = NULL;
> +	}
> +}
> +
> +#endif /* CONFIG_BLK_CGROUP */
> +
>  static void __init biovec_init_slabs(void)
>  {
>  	int i;
> diff --git a/include/linux/bio.h b/include/linux/bio.h
> index 129a9c0..692d3d5 100644
> --- a/include/linux/bio.h
> +++ b/include/linux/bio.h
> @@ -268,6 +268,14 @@ extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set
>  extern void bvec_free_bs(struct bio_set *, struct bio_vec *, unsigned int);
>  extern unsigned int bvec_nr_vecs(unsigned short idx);
>  
> +#ifdef CONFIG_BLK_CGROUP
> +int bio_associate_current(struct bio *bio);
> +void bio_disassociate_task(struct bio *bio);
> +#else	/* CONFIG_BLK_CGROUP */
> +static inline int bio_associate_current(struct bio *bio) { return -ENOENT; }
> +static inline void bio_disassociate_task(struct bio *bio) { }
> +#endif	/* CONFIG_BLK_CGROUP */
> +
>  /*
>   * bio_set is used to allow other portions of the IO system to
>   * allocate their own private memory pools for bio and iovec structures.
> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
> index 4053cbd..0edb65d 100644
> --- a/include/linux/blk_types.h
> +++ b/include/linux/blk_types.h
> @@ -14,6 +14,8 @@ struct bio;
>  struct bio_integrity_payload;
>  struct page;
>  struct block_device;
> +struct io_context;
> +struct cgroup_subsys_state;
>  typedef void (bio_end_io_t) (struct bio *, int);
>  typedef void (bio_destructor_t) (struct bio *);
>  
> @@ -66,6 +68,14 @@ struct bio {
>  	bio_end_io_t		*bi_end_io;
>  
>  	void			*bi_private;
> +#ifdef CONFIG_BLK_CGROUP
> +	/*
> +	 * Optional ioc and css associated with this bio.  Put on bio
> +	 * release.  Read comment on top of bio_associate_current().
> +	 */
> +	struct io_context	*bi_ioc;
> +	struct cgroup_subsys_state *bi_css;
> +#endif
>  #if defined(CONFIG_BLK_DEV_INTEGRITY)
>  	struct bio_integrity_payload *bi_integrity;  /* data integrity */
>  #endif
> diff --git a/include/linux/elevator.h b/include/linux/elevator.h
> index 97fb255..c03af76 100644
> --- a/include/linux/elevator.h
> +++ b/include/linux/elevator.h
> @@ -28,7 +28,8 @@ typedef int (elevator_may_queue_fn) (struct request_queue *, int);
>  
>  typedef void (elevator_init_icq_fn) (struct io_cq *);
>  typedef void (elevator_exit_icq_fn) (struct io_cq *);
> -typedef int (elevator_set_req_fn) (struct request_queue *, struct request *, gfp_t);
> +typedef int (elevator_set_req_fn) (struct request_queue *, struct request *,
> +				   struct bio *, gfp_t);
>  typedef void (elevator_put_req_fn) (struct request *);
>  typedef void (elevator_activate_req_fn) (struct request_queue *, struct request *);
>  typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct request *);
> @@ -129,7 +130,8 @@ extern void elv_unregister_queue(struct request_queue *q);
>  extern int elv_may_queue(struct request_queue *, int);
>  extern void elv_abort_queue(struct request_queue *);
>  extern void elv_completed_request(struct request_queue *, struct request *);
> -extern int elv_set_request(struct request_queue *, struct request *, gfp_t);
> +extern int elv_set_request(struct request_queue *q, struct request *rq,
> +			   struct bio *bio, gfp_t gfp_mask);
>  extern void elv_put_request(struct request_queue *, struct request *);
>  extern void elv_drain_elevator(struct request_queue *);
>  
> -- 
> 1.7.7.3
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[Other Archives]     [Linux Kernel Newbies]     [Linux Driver Development]     [Fedora Kernel]     [Linux Kernel Testers]     [Linux SH]     [Linux Omap]     [Linux Kbuild]     [Linux Tape]     [Linux Input]     [Linux Kernel Janitors]     [Linux Kernel Packagers]     [Linux Doc]     [Linux Man Pages]     [Linux API]     [Linux Memory Management]     [Linux Modules]     [Linux Standards]     [Kernel Announce]     [Netdev]     [Git]     [Linux PCI]     Linux CAN Development     [Linux I2C]     [Linux RDMA]     [Linux NUMA]     [Netfilter]     [Netfilter Devel]     [SELinux]     [Bugtraq]     [FIO]     [Linux Perf Users]     [Linux Serial]     [Linux PPP]     [Linux ISDN]     [Linux Next]     [Kernel Stable Commits]     [Linux Tip Commits]     [Kernel MM Commits]     [Linux Security Module]     [Filesystem Development]     [Ext3 Filesystem]     [Linux bcache]     [Ext4 Filesystem]     [Linux BTRFS]     [Linux CEPH Filesystem]     [Linux XFS]     [XFS]     [Linux NFS]     [Linux CIFS]     [Ecryptfs]     [Linux NILFS]     [Linux Cachefs]     [Reiser FS]     [Initramfs]     [Linux FB Devel]     [Linux OpenGL]     [DRI Devel]     [Fastboot]     [Linux RT Users]     [Linux RT Stable]     [eCos]     [Corosync]     [Linux Clusters]     [LVS Devel]     [Hot Plug]     [Linux Virtualization]     [KVM]     [KVM PPC]     [KVM ia64]     [Linux Containers]     [Linux Hexagon]     [Linux Cgroups]     [Util Linux]     [Wireless]     [Linux Bluetooth]     [Bluez Devel]     [Ethernet Bridging]     [Embedded Linux]     [Barebox]     [Linux MMC]     [Linux IIO]     [Sparse]     [Smatch]     [Linux Arch]     [x86 Platform Driver]     [Linux ACPI]     [Linux IBM ACPI]     [LM Sensors]     [CPU Freq]     [Linux Power Management]     [Linmodems]     [Linux DCCP]     [Linux SCTP]     [ALSA Devel]     [Linux USB]     [Linux PA RISC]     [Linux Samsung SOC]     [MIPS Linux]     [IBM S/390 Linux]     [ARM Linux]     [ARM Kernel]     [ARM MSM]     [Tegra Devel]     [Sparc Linux]     [Linux Security]     [Linux Sound]     [Linux Media]     [Video 4 Linux]     [Linux IRDA Users]     [Linux for the blind]     [Linux RAID]     [Linux ATA RAID]     [Device Mapper]     [Linux SCSI]     [SCSI Target Devel]     [Linux SCSI Target Infrastructure]     [Linux IDE]     [Linux SMP]     [Linux AXP]     [Linux Alpha]     [Linux M68K]     [Linux ia64]     [Linux 8086]     [Linux x86_64]     [Linux Config]     [Linux Apps]     [Linux MSDOS]     [Linux X.25]     [Linux Crypto]     [DM Crypt]     [Linux Trace Users]     [Linux Btrace]     [Linux Watchdog]     [Utrace Devel]     [Linux C Programming]     [Linux Assembly]     [Dash]     [DWARVES]     [Hail Devel]     [Linux Kernel Debugger]     [Linux gcc]     [Gcc Help]     [X.Org]     [Wine]

Add to Google Powered by Linux

[Older Kernel Discussion]     [Yosemite National Park Forum]     [Large Format Photos]     [Gimp]     [Yosemite Photos]     [Stuff]