PATCH 11/15 Mempolicy: use MPOL_F_LOCAL to indicate preferred local policy
Against: 2.6.25-rc8-mm1
Now that we're using "preferred local" policy for system default,
we need to make this as fast as possible. Because of the variable
size of the mempolicy structure [based on size of nodemasks], the
preferred_node may be in a different cacheline from the mode. This
can result in accessing an extra cacheline in the normal case of
system default policy. Suspect this is the cause of an observed
2-3% slowdown in page fault testing relative to kernel without this
patch series.
To alleviate this, use an internal mode flag, MPOL_F_LOCAL in the
mempolicy flags member which is guaranteed [?] to be in the same
cacheline as the mode itself.
Verified that reworked mempolicy now performs slightly better on
25-rc8-mm1 for both anon and shmem segments with system default and
vma [preferred local] policy.
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@xxxxxx>
Documentation/vm/numa_memory_policy.txt | 11 +++----
include/linux/mempolicy.h | 1
mm/mempolicy.c | 45 ++++++++++++++------------------
3 files changed, 27 insertions(+), 30 deletions(-)
Index: linux-2.6.25-rc8-mm1/include/linux/mempolicy.h
===================================================================
--- linux-2.6.25-rc8-mm1.orig/include/linux/mempolicy.h 2008-04-02 17:47:26.000000000 -0400
+++ linux-2.6.25-rc8-mm1/include/linux/mempolicy.h 2008-04-02 17:48:32.000000000 -0400
@@ -50,6 +50,7 @@ enum {
* are never OR'ed into the mode in mempolicy API arguments.
*/
#define MPOL_F_SHARED (1 << 0) /* identify shared policies */
+#define MPOL_F_LOCAL (1 << 1) /* preferred local allocation */
#ifdef __KERNEL__
Index: linux-2.6.25-rc8-mm1/mm/mempolicy.c
===================================================================
--- linux-2.6.25-rc8-mm1.orig/mm/mempolicy.c 2008-04-02 17:47:41.000000000 -0400
+++ linux-2.6.25-rc8-mm1/mm/mempolicy.c 2008-04-02 17:51:58.000000000 -0400
@@ -110,7 +110,7 @@ enum zone_type policy_zone = 0;
struct mempolicy default_policy = {
.refcnt = ATOMIC_INIT(1), /* never free it */
.mode = MPOL_PREFERRED,
- .v = { .preferred_node = -1 },
+ .flags = MPOL_F_LOCAL,
};
static const struct mempolicy_operations {
@@ -163,7 +163,7 @@ static int mpol_new_interleave(struct me
static int mpol_new_preferred(struct mempolicy *pol, const nodemask_t *nodes)
{
if (!nodes)
- pol->v.preferred_node = -1; /* local allocation */
+ pol->flags |= MPOL_F_LOCAL; /* local allocation */
else if (nodes_empty(*nodes))
return -EINVAL; /* no allowed nodes */
else
@@ -290,14 +290,15 @@ static void mpol_rebind_preferred(struct
if (pol->flags & MPOL_F_STATIC_NODES) {
int node = first_node(pol->w.user_nodemask);
- if (node_isset(node, *nodes))
+ if (node_isset(node, *nodes)) {
pol->v.preferred_node = node;
- else
- pol->v.preferred_node = -1;
+ pol->flags &= ~MPOL_F_LOCAL;
+ } else
+ pol->flags |= MPOL_F_LOCAL;
} else if (pol->flags & MPOL_F_RELATIVE_NODES) {
mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes);
pol->v.preferred_node = first_node(tmp);
- } else if (pol->v.preferred_node != -1) {
+ } else if (!(pol->flags & MPOL_F_LOCAL)) {
pol->v.preferred_node = node_remap(pol->v.preferred_node,
pol->w.cpuset_mems_allowed,
*nodes);
@@ -645,7 +646,7 @@ static void get_policy_nodemask(struct m
*nodes = p->v.nodes;
break;
case MPOL_PREFERRED:
- if (p->v.preferred_node >= 0)
+ if (!(p->flags & MPOL_F_LOCAL))
node_set(p->v.preferred_node, *nodes);
/* else return empty node mask for local allocation */
break;
@@ -1324,13 +1325,12 @@ static nodemask_t *policy_nodemask(gfp_t
/* Return a zonelist indicated by gfp for node representing a mempolicy */
static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy)
{
- int nd;
+ int nd = numa_node_id();
switch (policy->mode) {
case MPOL_PREFERRED:
- nd = policy->v.preferred_node;
- if (nd < 0)
- nd = numa_node_id();
+ if (!(policy->flags & MPOL_F_LOCAL))
+ nd = policy->v.preferred_node;
break;
case MPOL_BIND:
/*
@@ -1339,16 +1339,13 @@ static struct zonelist *policy_zonelist(
* current node is part of the mask, we use the zonelist for
* the first node in the mask instead.
*/
- nd = numa_node_id();
if (unlikely(gfp & __GFP_THISNODE) &&
unlikely(!node_isset(nd, policy->v.nodes)))
nd = first_node(policy->v.nodes);
break;
case MPOL_INTERLEAVE: /* should not happen */
- nd = numa_node_id();
break;
default:
- nd = 0;
BUG();
}
return node_zonelist(nd, gfp);
@@ -1379,14 +1376,15 @@ static unsigned interleave_nodes(struct
*/
unsigned slab_node(struct mempolicy *policy)
{
- if (!policy)
+ if (!policy || policy->flags & MPOL_F_LOCAL)
return numa_node_id();
switch (policy->mode) {
case MPOL_PREFERRED:
- if (unlikely(policy->v.preferred_node >= 0))
- return policy->v.preferred_node;
- return numa_node_id();
+ /*
+ * handled MPOL_F_LOCAL above
+ */
+ return policy->v.preferred_node;
case MPOL_INTERLEAVE:
return interleave_nodes(policy);
@@ -1667,7 +1665,8 @@ int __mpol_equal(struct mempolicy *a, st
case MPOL_INTERLEAVE:
return nodes_equal(a->v.nodes, b->v.nodes);
case MPOL_PREFERRED:
- return a->v.preferred_node == b->v.preferred_node;
+ return a->v.preferred_node == b->v.preferred_node &&
+ a->flags == b->flags;
default:
BUG();
return 0;
@@ -1947,7 +1946,7 @@ void numa_default_policy(void)
}
/*
- * "local" is pseudo-policy: MPOL_PREFERRED with preferred_node == -1
+ * "local" is pseudo-policy: MPOL_PREFERRED with MPOL_F_LOCAL flag
* Used only for mpol_to_str()
*/
#define MPOL_LOCAL (MPOL_INTERLEAVE + 1)
@@ -1963,7 +1962,6 @@ static inline int mpol_to_str(char *buff
{
char *p = buffer;
int l;
- int nid;
nodemask_t nodes;
unsigned short mode;
unsigned short flags = pol ? pol->flags : 0;
@@ -1980,11 +1978,10 @@ static inline int mpol_to_str(char *buff
case MPOL_PREFERRED:
nodes_clear(nodes);
- nid = pol->v.preferred_node;
- if (nid < 0)
+ if (flags & MPOL_F_LOCAL)
mode = MPOL_LOCAL; /* pseudo-policy */
else
- node_set(nid, nodes);
+ node_set(pol->v.preferred_node, nodes);
break;
case MPOL_BIND:
Index: linux-2.6.25-rc8-mm1/Documentation/vm/numa_memory_policy.txt
===================================================================
--- linux-2.6.25-rc8-mm1.orig/Documentation/vm/numa_memory_policy.txt 2008-04-02 17:47:37.000000000 -0400
+++ linux-2.6.25-rc8-mm1/Documentation/vm/numa_memory_policy.txt 2008-04-02 17:47:48.000000000 -0400
@@ -176,12 +176,11 @@ Components of Memory Policies
containing the cpu where the allocation takes place.
Internally, the Preferred policy uses a single node--the
- preferred_node member of struct mempolicy. A "distinguished
- value of this preferred_node, currently '-1', is interpreted
- as "the node containing the cpu where the allocation takes
- place"--local allocation. "Local" allocation policy can be
- viewed as a Preferred policy that starts at the node containing
- the cpu where the allocation takes place.
+ preferred_node member of struct mempolicy. When the internal
+ mode flag MPOL_F_LOCAL is set, the preferred_node is ignored and
+ the policy is interpreted as local allocation. "Local" allocation
+ policy can be viewed as a Preferred policy that starts at the node
+ containing the cpu where the allocation takes place.
It is possible for the user to specify that local allocation is
always preferred by passing an empty nodemask with this mode.
--
To unsubscribe from this list: send the line "unsubscribe linux-numa" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
[Home]
[Linux USB Devel]
[Video for Linux]
[Linux Audio Users]
[Photo]
[Yosemite News]
[Yosemite Photos]
[Free Online Dating]
[Linux Kernel]
[Linux SCSI]
[XFree86]
[Devices]