aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric B Munson <emunson@mgebm.net>2011-12-07 10:38:59 -0500
committerEric B Munson <emunson@mgebm.net>2011-12-15 14:50:15 -0500
commit9876ed942f134fa1df32b2bba235f58d3a1ea781 (patch)
treec890c4815d7a4486ab406178bdf20b52454cc532
parent87485135bae76e5961afa432aa3b954c712d7b52 (diff)
downloadlibhugetlbfs-9876ed942f134fa1df32b2bba235f58d3a1ea781.tar.gz
Add support for THP in morecore
Transparent huge pages (THP) give another option for access to huge pages for anonymous mappings. This patch adds the ability to specify that the process heap should be aligned and (if requested madvised) to be merged into huge pages by khugepaged. For more information on THP see linux-2.6/Documentation/transhuge.txt. Signed-off-by: Eric B Munson <emunson@mgebm.net> Cc: abh@cray.com Cc: daivd@gibson.dropbear.id.au
-rw-r--r--HOWTO11
-rw-r--r--hugeutils.c9
-rw-r--r--libhugetlbfs_internal.h1
-rw-r--r--morecore.c92
4 files changed, 105 insertions, 8 deletions
diff --git a/HOWTO b/HOWTO
index 3c1f124..8db958d 100644
--- a/HOWTO
+++ b/HOWTO
@@ -2,7 +2,7 @@ libhugetlbfs HOWTO
==================
Author: David Gibson <dwg@au1.ibm.com>, Adam Litke <agl@us.ibm.com>, and others
-Last updated: February 1st, 2011
+Last updated: December 07, 2011
Introduction
============
@@ -264,6 +264,11 @@ environment variables:
To use a specific huge page size:
HUGETLB_MORECORE=<pagesize>
+ To use Transparent Huge Pages (THP):
+ HUGETLB_MORECORE=thp
+
+Note: This option requires a kernel that supports Transparent Huge Pages
+
Usually it's preferable to set these environment variables on the
command line of the program you wish to run, rather than using
"export", because you'll only want to enable the hugepage malloc() for
@@ -284,9 +289,9 @@ it yet, the following would work for a 64-bit program:
Under some circumstances, you might want to specify the address where
the hugepage heap is located. You can do this by setting the
HUGETLB_MORECORE_HEAPBASE environment variable to the heap address in
-hexadecimal. (NOTE: this will not work on PowerPC systems with old kernels
+hexadecimal. NOTE: this will not work on PowerPC systems with old kernels
which don't respect the hugepage hint address; see Kernel Prerequisites
-above).
+above. Also note that this option is ignored for THP morecore.
By default, the hugepage heap begins at roughly the same place a
normal page heap would, rounded up by an amount determined by your
diff --git a/hugeutils.c b/hugeutils.c
index b28bd0a..a7ccda0 100644
--- a/hugeutils.c
+++ b/hugeutils.c
@@ -335,6 +335,15 @@ void hugetlbfs_setup_env()
__hugetlb_opts.morecore = getenv("HUGETLB_MORECORE");
__hugetlb_opts.heapbase = getenv("HUGETLB_MORECORE_HEAPBASE");
+ if (__hugetlb_opts.morecore)
+ __hugetlb_opts.thp_morecore =
+ (strcasecmp(__hugetlb_opts.morecore, "thp") == 0);
+
+ if (__hugetlb_opts.thp_morecore && __hugetlb_opts.heapbase) {
+ DEBUG("Heapbase specified with THP for morecore, ignoring heapbase\n");
+ __hugetlb_opts.heapbase = NULL;
+ }
+
env = getenv("HUGETLB_FORCE_ELFMAP");
if (env && (strcasecmp(env, "yes") == 0))
__hugetlb_opts.force_elfmap = 1;
diff --git a/libhugetlbfs_internal.h b/libhugetlbfs_internal.h
index 97b19fa..ae8d7bc 100644
--- a/libhugetlbfs_internal.h
+++ b/libhugetlbfs_internal.h
@@ -63,6 +63,7 @@ struct libhugeopts_t {
bool shm_enabled;
bool no_reserve;
bool map_hugetlb;
+ bool thp_morecore;
unsigned long force_elfmap;
char *ld_preload;
char *elfmap;
diff --git a/morecore.c b/morecore.c
index 6b4364c..c02b11a 100644
--- a/morecore.c
+++ b/morecore.c
@@ -202,6 +202,73 @@ static void *hugetlbfs_morecore(ptrdiff_t increment)
return p;
}
+static void *thp_morecore(ptrdiff_t increment)
+{
+ void *p;
+ long delta;
+
+ INFO("thp_morecore(%ld) = ...\n", (long)increment);
+
+ delta = (heaptop - heapbase) + increment - mapsize;
+ delta = ALIGN(delta, hpage_size);
+
+ if (delta > 0) {
+ /*
+ * This first time we expand the mapping we need to account for
+ * the initial heap mapping not necessarily being huge page
+ * aligned
+ */
+ if (!mapsize)
+ delta = hugetlbfs_next_addr((long)heapbase + delta) -
+ (unsigned long)heapbase;
+
+ INFO("Adding %ld bytes to heap\n", delta);
+
+ p = sbrk(delta);
+ if (p == (void *)-1) {
+ WARNING("sbrk returned ENOMEM\n");
+ return NULL;
+ }
+
+ if (!mapsize) {
+ if (heapbase && (heapbase != p)) {
+ WARNING("Heap was expected at %p instead of %p, "
+ "heap has been modified by someone else!\n",
+ heapbase, p);
+ if (__hugetlbfs_debug)
+ dump_proc_pid_maps();
+ }
+ heapbase = heaptop = p;
+ }
+
+ mapsize += delta;
+#ifdef MADV_HUGEPAGE
+ madvise(p, delta, MADV_HUGEPAGE);
+#endif
+ } else if (delta < 0) {
+ /* shrinking the heap */
+ if (!mapsize) {
+ WARNING("Can't shrink an empty heap\n");
+ return NULL;
+ }
+
+ INFO("Attempting to shrink heap by %ld bytes with sbrk\n",
+ -delta);
+ p = sbrk(delta);
+ if (p == (void *)-1) {
+ WARNING("Unable to shrink heap\n");
+ return heaptop;
+ }
+
+ mapsize += delta;
+ }
+
+ p = heaptop;
+ heaptop += increment;
+ INFO("... = %p\n", p);
+ return p;
+}
+
void hugetlbfs_setup_morecore(void)
{
char *ep;
@@ -222,6 +289,8 @@ void hugetlbfs_setup_morecore(void)
*/
if (strncasecmp(__hugetlb_opts.morecore, "y", 1) == 0)
hpage_size = gethugepagesize();
+ else if (__hugetlb_opts.thp_morecore)
+ hpage_size = kernel_default_hugepage_size();
else
hpage_size = parse_page_size(__hugetlb_opts.morecore);
@@ -237,8 +306,12 @@ void hugetlbfs_setup_morecore(void)
return;
}
- if(__hugetlb_opts.map_hugetlb &&
- hpage_size == kernel_default_hugepage_size()) {
+ /*
+ * We won't need an fd for the heap mmaps if we are using MAP_HUGETLB
+ * or we are depending on transparent huge pages
+ */
+ if(__hugetlb_opts.thp_morecore || (__hugetlb_opts.map_hugetlb &&
+ hpage_size == kernel_default_hugepage_size())) {
heap_fd = -1;
} else {
if (!hugetlbfs_find_path_for_size(hpage_size)) {
@@ -253,7 +326,12 @@ void hugetlbfs_setup_morecore(void)
}
}
- if (__hugetlb_opts.heapbase) {
+ /*
+ * THP morecore uses sbrk to allocate more heap space, counting on the
+ * kernel to back the area with THP. So setting heapbase is
+ * meaningless if thp_morecore is used.
+ */
+ if (!__hugetlb_opts.thp_morecore && __hugetlb_opts.heapbase) {
heapaddr = strtoul(__hugetlb_opts.heapbase, &ep, 16);
if (*ep != '\0') {
WARNING("Can't parse HUGETLB_MORECORE_HEAPBASE: %s\n",
@@ -262,13 +340,17 @@ void hugetlbfs_setup_morecore(void)
}
} else {
heapaddr = (unsigned long)sbrk(0);
- heapaddr = hugetlbfs_next_addr(heapaddr);
+ if (!__hugetlb_opts.thp_morecore)
+ heapaddr = hugetlbfs_next_addr(heapaddr);
}
INFO("setup_morecore(): heapaddr = 0x%lx\n", heapaddr);
heaptop = heapbase = (void *)heapaddr;
- __morecore = &hugetlbfs_morecore;
+ if (__hugetlb_opts.thp_morecore)
+ __morecore = &thp_morecore;
+ else
+ __morecore = &hugetlbfs_morecore;
/* Set some allocator options more appropriate for hugepages */