aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNishanth Aravamudan <nacc@us.ibm.com>2008-04-09 11:08:58 -0700
committerNishanth Aravamudan <nacc@us.ibm.com>2008-04-10 20:30:26 -0700
commitfcbcd8d6681e5b62e526f980e20ad364d77d8744 (patch)
tree41388255383c94538f62fe53e3d58f9f8c5de613
parentb22579d338d7d351b8d3f829adc92f7081a4f100 (diff)
downloadlibhugetlbfs-fcbcd8d6681e5b62e526f980e20ad364d77d8744.tar.gz
morecore: add HUGETLB_NO_PREFAULT1.3-pre1
Add yet another environment variable, this time to prevent the prefaulting of hugepages via mlock() in morecore.c. Prefaulting in an unintelligent way (interleave across all nodes, e.g.) when an application is threaded/scheduled in an intelligent way (OpenMP) can result in very low performance. As long as enough hugepages are available for the system as a whole, there should be little difference between HUGETLB_NO_PREFAULT set and unset. However, if hugepages are a contested resource, applications may see SIGKILL if HUGETLB_NO_PREFAULT is set. Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
-rw-r--r--HOWTO22
-rw-r--r--debug.c5
-rw-r--r--libhugetlbfs_internal.h1
-rw-r--r--morecore.c30
4 files changed, 44 insertions, 14 deletions
diff --git a/HOWTO b/HOWTO
index 73f746d..c6c2fb3 100644
--- a/HOWTO
+++ b/HOWTO
@@ -247,6 +247,27 @@ segment); for 64-bit PowerPC binaries the address is rounded-up to a
multiple of 1TB. On all other platforms the address is rounded-up to
the size of a hugepage.
+By default, the hugepage heap will be prefaulted by libhugetlbfs to
+guarantee enough hugepages exist and are reserved for the application
+(if this was not done, applications could receive a SIGKILL signal if
+hugepages needed for the heap are used by another application before
+they are faulted in). This leads to local-node allocations when no
+memory policy is in place for hugepages. Therefore, it is recommended to
+use
+
+ $ numactl --interleave=all <your app command line>
+
+to regain some of the performance impact of local-node allocations on
+large NUMA systems. This can still result in poor performance for those
+applications which carefully place their threads on particular nodes
+(such as by using OpenMP). In that case, thread-local allocation is
+preferred. Users can specify HUGETLB_NO_PREFAULT to prevent the
+prefaulting of hugepages and instead rely on run-time faulting of
+hugepages. NOTE: specifying HUGETLB_NO_PREFAULT on a system where
+hugepages are available to and used by many process can result in some
+applications receving SIGKILL, so its use is not recommended in
+high-availability or production environments.
+
Using hugepage text, data, or BSS
---------------------------------
@@ -350,6 +371,7 @@ libhugetlbfs:
HUGETLB_MORECORE
HUGETLB_MORECORE_HEAPBASE
+ HUGETLB_NO_PREFAULT
Explained in "Using hugepages for malloc()
(morecore)"
diff --git a/debug.c b/debug.c
index 12b4de2..a2174f8 100644
--- a/debug.c
+++ b/debug.c
@@ -29,6 +29,7 @@
int __hugetlbfs_verbose = 1;
int __hugetlbfs_debug = 0;
+int __hugetlbfs_prefault = 1;
char __hugetlbfs_hostname[64];
static int initialized;
@@ -48,6 +49,10 @@ static void __hugetlbfs_init_debug(void)
if (env)
__hugetlbfs_debug = 1;
+ env = getenv("HUGETLB_NO_PREFAULT");
+ if (env)
+ __hugetlbfs_prefault = 0;
+
gethostname(__hugetlbfs_hostname, sizeof(__hugetlbfs_hostname)-1);
initialized = 1;
diff --git a/libhugetlbfs_internal.h b/libhugetlbfs_internal.h
index aedf738..b515ee5 100644
--- a/libhugetlbfs_internal.h
+++ b/libhugetlbfs_internal.h
@@ -42,6 +42,7 @@
extern int __hugetlbfs_verbose;
extern int __hugetlbfs_debug;
+extern int __hugetlbfs_prefault;
extern void __hugetlbfs_setup_elflink();
extern void __hugetlbfs_setup_morecore();
extern void __hugetlbfs_setup_debug();
diff --git a/morecore.c b/morecore.c
index 80c9898..11df17d 100644
--- a/morecore.c
+++ b/morecore.c
@@ -141,20 +141,22 @@ static void *hugetlbfs_morecore(ptrdiff_t increment)
* process tried to access the missing memory.
*/
- for (offset = 0; offset < delta; ) {
- for (i = 0; i < IOV_LEN && offset < delta; i++) {
- iov[i].iov_base = p + offset;
- iov[i].iov_len = 1;
- offset += blocksize;
- }
- ret = readv(zero_fd, iov, i);
- if (ret != i) {
- DEBUG("Got %d of %d requested; err=%d\n", ret,
- i, ret < 0 ? errno : 0);
- WARNING("Failed to reserve huge pages in "
- "hugetlbfs_morecore()\n");
- munmap(p, delta);
- return NULL;
+ if (__hugetlbfs_prefault) {
+ for (offset = 0; offset < delta; ) {
+ for (i = 0; i < IOV_LEN && offset < delta; i++) {
+ iov[i].iov_base = p + offset;
+ iov[i].iov_len = 1;
+ offset += blocksize;
+ }
+ ret = readv(zero_fd, iov, i);
+ if (ret != i) {
+ DEBUG("Got %d of %d requested; err=%d\n", ret,
+ i, ret < 0 ? errno : 0);
+ WARNING("Failed to reserve huge pages in "
+ "hugetlbfs_morecore()\n");
+ munmap(p, delta);
+ return NULL;
+ }
}
}