Re: [PATCH 03/16] sched: aggregate load contributed by task entities on parenting cfs_rq

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]


On Wed, 27 Jun 2012 19:24:14 -0700, Paul Turner wrote:
> For a given task t, we can compute its contribution to load as:
>   task_load(t) = runnable_avg(t) * weight(t)
>
> On a parenting cfs_rq we can then aggregate
>   runnable_load(cfs_rq) = \Sum task_load(t), for all runnable children t
>
> Maintain this bottom up, with task entities adding their contributed load to
> the parenting cfs_rq sum.  When a task entities load changes we add the same
                                         entity's ?

> delta to the maintained sum.
>
> Signed-off-by: Paul Turner <pjt@xxxxxxxxxx>
> Signed-off-by: Ben Segall <bsegall@xxxxxxxxxx>
> ---
>  include/linux/sched.h |    1 +
>  kernel/sched/debug.c  |    3 +++
>  kernel/sched/fair.c   |   51 +++++++++++++++++++++++++++++++++++++++++++++----
>  kernel/sched/sched.h  |   10 +++++++++-
>  4 files changed, 60 insertions(+), 5 deletions(-)
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 5bf5c79..0c54ce0 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1139,6 +1139,7 @@ struct load_weight {
>  struct sched_avg {
>  	u32 runnable_avg_sum, runnable_avg_period;
>  	u64 last_runnable_update;
> +	unsigned long load_avg_contrib;
>  };
>  
>  #ifdef CONFIG_SCHEDSTATS
> diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
> index 5d4a7dd..aeb74e3 100644
> --- a/kernel/sched/debug.c
> +++ b/kernel/sched/debug.c
> @@ -94,6 +94,7 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
>  #ifdef CONFIG_SMP
>  	P(se->avg.runnable_avg_sum);
>  	P(se->avg.runnable_avg_period);
> +	P(se->avg.load_avg_contrib);
>  #endif
>  #undef PN
>  #undef P
> @@ -227,6 +228,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
>  			cfs_rq->load_contribution);
>  	SEQ_printf(m, "  .%-30s: %d\n", "load_tg",
>  			atomic_read(&cfs_rq->tg->load_weight));
> +	SEQ_printf(m, "  .%-30s: %lld\n", "runnable_load_avg",
> +			cfs_rq->runnable_load_avg);
>  #endif
>  
>  	print_cfs_group_stats(m, cpu, cfs_rq->tg);
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 08bd3e0..8229766 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -1085,20 +1085,63 @@ static __always_inline int __update_entity_runnable_avg(u64 now,
>  	return decayed;
>  }
>  
> +/* Compute the current contribution to load_avg by se, return any delta */
> +static long __update_entity_load_avg_contrib(struct sched_entity *se)
> +{
> +	long old_contrib = se->avg.load_avg_contrib;
> +
> +	if (!entity_is_task(se))
> +		return 0;
> +
> +	se->avg.load_avg_contrib = div64_u64(se->avg.runnable_avg_sum *
> +					     se->load.weight,
> +					     se->avg.runnable_avg_period + 1);
> +
> +	return se->avg.load_avg_contrib - old_contrib;
> +}
> +
>  /* Update a sched_entity's runnable average */
>  static inline void update_entity_load_avg(struct sched_entity *se)
>  {
> -	__update_entity_runnable_avg(rq_of(cfs_rq_of(se))->clock_task, &se->avg,
> -				     se->on_rq);
> +	struct cfs_rq *cfs_rq = cfs_rq_of(se);
> +	long contrib_delta;
> +
> +	if (!__update_entity_runnable_avg(rq_of(cfs_rq)->clock_task, &se->avg,
> +					  se->on_rq))

Ok, now I see that the return value is used here.

Thanks,
Namhyung


> +		return;
> +
> +	contrib_delta = __update_entity_load_avg_contrib(se);
> +	if (se->on_rq)
> +		cfs_rq->runnable_load_avg += contrib_delta;
>  }
>  
>  static inline void update_rq_runnable_avg(struct rq *rq, int runnable)
>  {
>  	__update_entity_runnable_avg(rq->clock_task, &rq->avg, runnable);
>  }
> +
> +/* Add the load generated by se into cfs_rq's child load-average */
> +static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq,
> +						  struct sched_entity *se)
> +{
> +	update_entity_load_avg(se);
> +	cfs_rq->runnable_load_avg += se->avg.load_avg_contrib;
> +}
> +
> +/* Remove se's load from this cfs_rq child load-average */
> +static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq,
> +						  struct sched_entity *se)
> +{
> +	update_entity_load_avg(se);
> +	cfs_rq->runnable_load_avg -= se->avg.load_avg_contrib;
> +}
>  #else
>  static inline void update_entity_load_avg(struct sched_entity *se) {}
>  static inline void update_rq_runnable_avg(struct rq *rq, int runnable) {}
> +static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq,
> +						  struct sched_entity *se) {}
> +static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq,
> +						  struct sched_entity *se) {}
>  #endif
>  
>  static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
> @@ -1227,7 +1270,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
>  	 */
>  	update_curr(cfs_rq);
>  	update_cfs_load(cfs_rq, 0);
> -	update_entity_load_avg(se);
> +	enqueue_entity_load_avg(cfs_rq, se);
>  	account_entity_enqueue(cfs_rq, se);
>  	update_cfs_shares(cfs_rq);
>  
> @@ -1302,7 +1345,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
>  	 * Update run-time statistics of the 'current'.
>  	 */
>  	update_curr(cfs_rq);
> -	update_entity_load_avg(se);
> +	dequeue_entity_load_avg(cfs_rq, se);
>  
>  	update_stats_dequeue(cfs_rq, se);
>  	if (flags & DEQUEUE_SLEEP) {
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index bfdb119..26cc36f 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -222,6 +222,15 @@ struct cfs_rq {
>  	unsigned int nr_spread_over;
>  #endif
>  
> +#ifdef CONFIG_SMP
> +	/*
> +	 * CFS Load tracking
> +	 * Under CFS, load is tracked on a per-entity basis and aggregated up.
> +	 * This allows for the description of both thread and group usage (in
> +	 * the FAIR_GROUP_SCHED case).
> +	 */
> +	u64 runnable_load_avg;
> +#endif
>  #ifdef CONFIG_FAIR_GROUP_SCHED
>  	struct rq *rq;	/* cpu runqueue to which this cfs_rq is attached */
>  
> @@ -1204,4 +1213,3 @@ static inline void account_numa_dequeue(struct task_struct *p) { }
>  static inline void init_sched_numa(void) { }
>  
>  #endif /* CONFIG_NUMA */
> -
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[Other Archives]     [Linux Kernel Newbies]     [Linux Driver Development]     [Linux Kbuild]     [Fedora Kernel]     [Linux Kernel Testers]     [Linux SH]     [Linux Omap]     [Linux Tape]     [Linux Input]     [Linux Kernel Janitors]     [Linux Kernel Packagers]     [Linux Doc]     [Linux Man Pages]     [Linux API]     [Linux Memory Management]     [Linux Modules]     [Linux Standards]     [Kernel Announce]     [Netdev]     [Git]     [Linux PCI]     Linux CAN Development     [Linux I2C]     [Linux RDMA]     [Linux NUMA]     [Netfilter]     [Netfilter Devel]     [SELinux]     [Bugtraq]     [FIO]     [Linux Perf Users]     [Linux Serial]     [Linux PPP]     [Linux ISDN]     [Linux Next]     [Kernel Stable Commits]     [Linux Tip Commits]     [Kernel MM Commits]     [Linux Security Module]     [AutoFS]     [Filesystem Development]     [Ext3 Filesystem]     [Linux bcache]     [Ext4 Filesystem]     [Linux BTRFS]     [Linux CEPH Filesystem]     [Linux XFS]     [XFS]     [Linux NFS]     [Linux CIFS]     [Ecryptfs]     [Linux NILFS]     [Linux Cachefs]     [Reiser FS]     [Initramfs]     [Linux FB Devel]     [Linux OpenGL]     [DRI Devel]     [Fastboot]     [Linux RT Users]     [Linux RT Stable]     [eCos]     [Corosync]     [Linux Clusters]     [LVS Devel]     [Hot Plug]     [Linux Virtualization]     [KVM]     [KVM PPC]     [KVM ia64]     [Linux Containers]     [Linux Hexagon]     [Linux Cgroups]     [Util Linux]     [Wireless]     [Linux Bluetooth]     [Bluez Devel]     [Ethernet Bridging]     [Embedded Linux]     [Barebox]     [Linux MMC]     [Linux IIO]     [Sparse]     [Smatch]     [Linux Arch]     [x86 Platform Driver]     [Linux ACPI]     [Linux IBM ACPI]     [LM Sensors]     [CPU Freq]     [Linux Power Management]     [Linmodems]     [Linux DCCP]     [Linux SCTP]     [ALSA Devel]     [Linux USB]     [Linux PA RISC]     [Linux Samsung SOC]     [MIPS Linux]     [IBM S/390 Linux]     [ARM Linux]     [ARM Kernel]     [ARM MSM]     [Tegra Devel]     [Sparc Linux]     [Linux Security]     [Linux Sound]     [Linux Media]     [Video 4 Linux]     [Linux IRDA Users]     [Linux for the blind]     [Linux RAID]     [Linux ATA RAID]     [Device Mapper]     [Linux SCSI]     [SCSI Target Devel]     [Linux SCSI Target Infrastructure]     [Linux IDE]     [Linux SMP]     [Linux AXP]     [Linux Alpha]     [Linux M68K]     [Linux ia64]     [Linux 8086]     [Linux x86_64]     [Linux Config]     [Linux Apps]     [Linux MSDOS]     [Linux X.25]     [Linux Crypto]     [DM Crypt]     [Linux Trace Users]     [Linux Btrace]     [Linux Watchdog]     [Utrace Devel]     [Linux C Programming]     [Linux Assembly]     [Dash]     [DWARVES]     [Hail Devel]     [Linux Kernel Debugger]     [Linux gcc]     [Gcc Help]     [X.Org]     [Wine]

Add to Google Powered by Linux

[Older Kernel Discussion]     [Yosemite National Park Forum]     [Large Format Photos]     [Gimp]     [Yosemite Photos]     [Stuff]