mempolicy: disallow static or relative flags for local preferred mode
MPOL_F_STATIC_NODES and MPOL_F_RELATIVE_NODES don't mean anything for
MPOL_PREFERRED policies that were created with an empty nodemask (for purely
local allocations). They'll never be invalidated because the allowed mems of
a task changes or need to be rebound relative to a cpuset's placement.
Also fixes a bug identified by Lee Schermerhorn that disallowed empty
nodemasks to be passed to MPOL_PREFERRED to specify local allocations. [A
different, somewhat incomplete, patch already existed in 25-rc5-mm1.]
Cc: Paul Jackson <pj@sgi.com>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/Documentation/vm/numa_memory_policy.txt b/Documentation/vm/numa_memory_policy.txt
index 706410d..1c7dd21 100644
--- a/Documentation/vm/numa_memory_policy.txt
+++ b/Documentation/vm/numa_memory_policy.txt
@@ -205,6 +205,12 @@
local allocation for a specific range of addresses--i.e. for
VMA policies.
+ It is possible for the user to specify that local allocation is
+ always preferred by passing an empty nodemask with this mode.
+ If an empty nodemask is passed, the policy cannot use the
+ MPOL_F_STATIC_NODES or MPOL_F_RELATIVE_NODES flags described
+ below.
+
MPOL_INTERLEAVED: This mode specifies that page allocations be
interleaved, on a page granularity, across the nodes specified in
the policy. This mode also behaves slightly differently, based on
@@ -254,7 +260,10 @@
occurs over that node. If no nodes from the user's nodemask are
now allowed, the Default behavior is used.
- MPOL_F_STATIC_NODES cannot be used with MPOL_F_RELATIVE_NODES.
+ MPOL_F_STATIC_NODES cannot be combined with the
+ MPOL_F_RELATIVE_NODES flag. It also cannot be used for
+ MPOL_PREFERRED policies that were created with an empty nodemask
+ (local allocation).
MPOL_F_RELATIVE_NODES: This flag specifies that the nodemask passed
by the user will be mapped relative to the set of the task or VMA's
@@ -301,7 +310,10 @@
set of memory nodes allowed by the task's cpuset, as that may
change over time.
- MPOL_F_RELATIVE_NODES cannot be used with MPOL_F_STATIC_NODES.
+ MPOL_F_RELATIVE_NODES cannot be combined with the
+ MPOL_F_STATIC_NODES flag. It also cannot be used for
+ MPOL_PREFERRED policies that were created with an empty nodemask
+ (local allocation).
MEMORY POLICY APIs
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index a94d994..c1b9077 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -181,27 +181,43 @@
{
struct mempolicy *policy;
nodemask_t cpuset_context_nmask;
- int localalloc = 0;
int ret;
pr_debug("setting mode %d flags %d nodes[0] %lx\n",
mode, flags, nodes ? nodes_addr(*nodes)[0] : -1);
- if (mode == MPOL_DEFAULT)
- return NULL;
- if (!nodes || nodes_empty(*nodes)) {
- if (mode != MPOL_PREFERRED)
+ if (mode == MPOL_DEFAULT) {
+ if (nodes && !nodes_empty(*nodes))
return ERR_PTR(-EINVAL);
- localalloc = 1; /* special case: no mode flags */
+ return NULL;
}
+ VM_BUG_ON(!nodes);
+
+ /*
+ * MPOL_PREFERRED cannot be used with MPOL_F_STATIC_NODES or
+ * MPOL_F_RELATIVE_NODES if the nodemask is empty (local allocation).
+ * All other modes require a valid pointer to a non-empty nodemask.
+ */
+ if (mode == MPOL_PREFERRED) {
+ if (nodes_empty(*nodes)) {
+ if (((flags & MPOL_F_STATIC_NODES) ||
+ (flags & MPOL_F_RELATIVE_NODES)))
+ return ERR_PTR(-EINVAL);
+ nodes = NULL; /* flag local alloc */
+ }
+ } else if (nodes_empty(*nodes))
+ return ERR_PTR(-EINVAL);
policy = kmem_cache_alloc(policy_cache, GFP_KERNEL);
if (!policy)
return ERR_PTR(-ENOMEM);
atomic_set(&policy->refcnt, 1);
policy->policy = mode;
+ policy->flags = flags;
- if (!localalloc) {
- policy->flags = flags;
+ if (nodes) {
+ /*
+ * cpuset related setup doesn't apply to local allocation
+ */
cpuset_update_task_memory_state();
if (flags & MPOL_F_RELATIVE_NODES)
mpol_relative_nodemask(&cpuset_context_nmask, nodes,
@@ -217,7 +233,7 @@
}
ret = mpol_ops[mode].create(policy,
- localalloc ? NULL : &cpuset_context_nmask);
+ nodes ? &cpuset_context_nmask : NULL);
if (ret < 0) {
kmem_cache_free(policy_cache, policy);
return ERR_PTR(ret);
@@ -259,10 +275,6 @@
{
nodemask_t tmp;
- /*
- * check 'STATIC_NODES first, as preferred_node == -1 may be
- * a temporary, "fallback" state for this policy.
- */
if (pol->flags & MPOL_F_STATIC_NODES) {
int node = first_node(pol->w.user_nodemask);
@@ -270,12 +282,10 @@
pol->v.preferred_node = node;
else
pol->v.preferred_node = -1;
- } else if (pol->v.preferred_node == -1) {
- return; /* no remap required for explicit local alloc */
} else if (pol->flags & MPOL_F_RELATIVE_NODES) {
mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes);
pol->v.preferred_node = first_node(tmp);
- } else {
+ } else if (pol->v.preferred_node != -1) {
pol->v.preferred_node = node_remap(pol->v.preferred_node,
pol->w.cpuset_mems_allowed,
*nodes);