path: root/mm/mempolicy.c
diff options
authorMiao Xie <miaox@cn.fujitsu.com>2010-05-24 14:32:08 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2010-05-25 08:06:57 -0700
commitc0ff7453bb5c7c98e0885fb94279f2571946f280 (patch)
tree8bb2b169a5145f0496575dbd2f48bb4b1c83f819 /mm/mempolicy.c
parent708c1bbc9d0c3e57f40501794d9b0eed29d10fce (diff)
cpuset,mm: fix no node to alloc memory when changing cpuset's mems
Before applying this patch, cpuset updates task->mems_allowed and mempolicy by setting all new bits in the nodemask first, and clearing all old unallowed bits later. But in the way, the allocator may find that there is no node to alloc memory. The reason is that cpuset rebinds the task's mempolicy, it cleans the nodes which the allocater can alloc pages on, for example: (mpol: mempolicy) task1 task1's mpol task2 alloc page 1 alloc on node0? NO 1 1 change mems from 1 to 0 1 rebind task1's mpol 0-1 set new bits 0 clear disallowed bits alloc on node1? NO 0 ... can't alloc page goto oom This patch fixes this problem by expanding the nodes range first(set newly allowed bits) and shrink it lazily(clear newly disallowed bits). So we use a variable to tell the write-side task that read-side task is reading nodemask, and the write-side task clears newly disallowed nodes after read-side task ends the current memory allocation. [akpm@linux-foundation.org: fix spello] Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> Cc: David Rientjes <rientjes@google.com> Cc: Nick Piggin <npiggin@suse.de> Cc: Paul Menage <menage@google.com> Cc: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk> Cc: Ravikiran Thirumalai <kiran@scalex86.org> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Christoph Lameter <cl@linux-foundation.org> Cc: Andi Kleen <andi@firstfloor.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/mempolicy.c')
1 files changed, 20 insertions, 4 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 8a993db8802..721b2b33803 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1639,6 +1639,8 @@ static inline unsigned interleave_nid(struct mempolicy *pol,
* to the struct mempolicy for conditional unref after allocation.
* If the effective policy is 'BIND, returns a pointer to the mempolicy's
* @nodemask for filtering the zonelist.
+ *
+ * Must be protected by get_mems_allowed()
struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr,
gfp_t gfp_flags, struct mempolicy **mpol,
@@ -1684,6 +1686,7 @@ bool init_nodemask_of_mempolicy(nodemask_t *mask)
if (!(mask && current->mempolicy))
return false;
+ task_lock(current);
mempolicy = current->mempolicy;
switch (mempolicy->mode) {
@@ -1703,6 +1706,7 @@ bool init_nodemask_of_mempolicy(nodemask_t *mask)
+ task_unlock(current);
return true;
@@ -1750,13 +1754,17 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
struct mempolicy *pol = get_vma_policy(current, vma, addr);
struct zonelist *zl;
+ struct page *page;
+ get_mems_allowed();
if (unlikely(pol->mode == MPOL_INTERLEAVE)) {
unsigned nid;
nid = interleave_nid(pol, vma, addr, PAGE_SHIFT);
- return alloc_page_interleave(gfp, 0, nid);
+ page = alloc_page_interleave(gfp, 0, nid);
+ put_mems_allowed();
+ return page;
zl = policy_zonelist(gfp, pol);
if (unlikely(mpol_needs_cond_ref(pol))) {
@@ -1766,12 +1774,15 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
struct page *page = __alloc_pages_nodemask(gfp, 0,
zl, policy_nodemask(gfp, pol));
+ put_mems_allowed();
return page;
* fast path: default or task policy
- return __alloc_pages_nodemask(gfp, 0, zl, policy_nodemask(gfp, pol));
+ page = __alloc_pages_nodemask(gfp, 0, zl, policy_nodemask(gfp, pol));
+ put_mems_allowed();
+ return page;
@@ -1796,18 +1807,23 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
struct page *alloc_pages_current(gfp_t gfp, unsigned order)
struct mempolicy *pol = current->mempolicy;
+ struct page *page;
if (!pol || in_interrupt() || (gfp & __GFP_THISNODE))
pol = &default_policy;
+ get_mems_allowed();
* No reference counting needed for current->mempolicy
* nor system default_policy
if (pol->mode == MPOL_INTERLEAVE)
- return alloc_page_interleave(gfp, order, interleave_nodes(pol));
- return __alloc_pages_nodemask(gfp, order,
+ page = alloc_page_interleave(gfp, order, interleave_nodes(pol));
+ else
+ page = __alloc_pages_nodemask(gfp, order,
policy_zonelist(gfp, pol), policy_nodemask(gfp, pol));
+ put_mems_allowed();
+ return page;