aboutsummaryrefslogtreecommitdiff
path: root/kernel-features.c
diff options
context:
space:
mode:
authorAdam Litke <agl@us.ibm.com>2008-08-05 20:48:21 +0000
committerAdam Litke <agl@us.ibm.com>2008-08-05 20:48:21 +0000
commitf9456b5f72ce0310884084c9e25c4cbfb4f57ca2 (patch)
tree5769cf28802db454b0d85de532d8ee746823f390 /kernel-features.c
parent56f74e9b51f9bce83cb18e0c9a9f82bbe6da8ead (diff)
downloadlibhugetlbfs-f9456b5f72ce0310884084c9e25c4cbfb4f57ca2.tar.gz
[RFC] Use the kernel version number to identify kernel functionality V2
Historically, libhugetlbs has relied on kernel features that either: have been known to exist in all supported kernel versions, or are easily detected. As of kernel version 2.6.27-rc1, a new crucial feature has been added that is not possible to reliably detect. Huge page mappings created with the MAP_PRIVATE flag will have huge pages reserved up-front. With private reservations in effect, it is safe to allow demand-faulting of the HUGETLB_MORECORE heap which can lead to dramatic performance improvements on NUMA systems. This is only safe behavior in the presence of private reservations. The only way to identify that a kernel has private reservations support is to examine the kernel version to see if it is more recent than when the feature appeared. I am well aware of the drawbacks of using the kernel version to affect library behavior but I don't see any alternative. I would suggest that the kernel version should be used only in cases when there is no alternative. How it works ============ Kernels are assumed to have a mandatory base version x.y.z (eg. 2.6.17) and one optional modifier: a post version (stable tree x.y.z.q) or a pre version (x.y.z-{preN|rcN}). All other version appendices (such as -mmN) are ignored. The following ordering rules apply: x.y.z-rc(N) < x.y.z-rc(N+1) < x.y.z < x.y.z.(N) < x.y.z.(N+1) When libhugetlbfs initializes, the running kernel version is probed using uname. A list of feature definitions is scanned and those with a minimum kernel version have that version compared to the runninng kernel. If the running kernel is found to be equal to or greater than the minimum required kernel version, a bit in a feature mask is set to indicate the presence of the feature. A feature can be later checked for by using a simple function that checks the bitmask. Changes since V1 (Thanks Andy Whitcroft and Mel Gorman): - Fixed feature_mask handling - Readability improvements
Diffstat (limited to 'kernel-features.c')
-rw-r--r--kernel-features.c181
1 files changed, 181 insertions, 0 deletions
diff --git a/kernel-features.c b/kernel-features.c
new file mode 100644
index 0000000..1b4508c
--- /dev/null
+++ b/kernel-features.c
@@ -0,0 +1,181 @@
+/*
+ * libhugetlbfs - Easy use of Linux hugepages
+ * Copyright (C) 2008 Adam Litke, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/utsname.h>
+#include "kernel-features.h"
+#include "hugetlbfs.h"
+#include "libhugetlbfs_internal.h"
+#include "libhugetlbfs_debug.h"
+
+static struct kernel_version running_kernel_version;
+
+/* This mask should always be 32 bits, regardless of the platform word size */
+static unsigned int feature_mask;
+
+static struct feature kernel_features[] = {
+ [HUGETLB_FEATURE_PRIVATE_RESV] = {
+ .name = "private_reservations",
+ .required_version = "2.6.27-rc1",
+ },
+};
+
+static void debug_kernel_version(void)
+{
+ struct kernel_version *ver = &running_kernel_version;
+
+ DEBUG("Parsed kernel version: [%u] . [%u] . [%u] ",
+ ver->major, ver->minor, ver->release);
+ if (ver->post)
+ DEBUG_CONT(" [post-release: %u]\n", ver->post);
+ else if (ver->pre)
+ DEBUG_CONT(" [pre-release: %u]\n", ver->pre);
+ else
+ DEBUG_CONT("\n");
+}
+
+static int str_to_ver(const char *str, struct kernel_version *ver)
+{
+ int err;
+ int nr_chars;
+ char extra[4];
+
+ /* Clear out version struct */
+ ver->major = ver->minor = ver->release = ver->post = ver->pre = 0;
+
+ /* The kernel always starts x.y.z */
+ err = sscanf(str, "%u.%u.%u%n", &ver->major, &ver->minor, &ver->release,
+ &nr_chars);
+ /*
+ * The sscanf man page says that %n may or may not affect the return
+ * value so make sure it is at least 3 to cover the three kernel
+ * version variables and assume nr_chars will be correctly assigned.
+ */
+ if (err < 3) {
+ ERROR("Unable to determine base kernel version: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ /* Advance the str by the number of characters indicated by sscanf */
+ str += nr_chars;
+
+ /* Try to match a post/stable version */
+ err = sscanf(str, ".%u", &ver->post);
+ if (err == 1)
+ return 0;
+
+ /* Try to match a preN/rcN version */
+ err = sscanf(str, "-%3[^0-9]%u", extra, &ver->pre);
+ if (err != 2 || (strcmp(extra, "pre") != 0 && strcmp(extra, "rc") != 0))
+ ver->pre = 0;
+
+ /*
+ * For now we ignore any extraversions besides pre and post versions
+ * and treat them as equal to the base version.
+ */
+ return 0;
+}
+
+static int int_cmp(int a, int b)
+{
+ if (a < b)
+ return -1;
+ if (b > a)
+ return 1;
+ else
+ return 0;
+}
+
+/*
+ * Pre-release kernels have the following compare rules:
+ * X.Y.(Z - 1) < X.Y.Z-rcN < X.Y.X
+ * This order can be enforced by simply decrementing the release (for
+ * comparison purposes) when there is a pre/rc modifier in effect.
+ */
+static int ver_cmp_release(struct kernel_version *ver)
+{
+ if (ver->pre)
+ return ver->release - 1;
+ else
+ return ver->release;
+}
+
+static int ver_cmp(struct kernel_version *a, struct kernel_version *b)
+{
+ int ret, a_release, b_release;
+
+ if ((ret = int_cmp(a->major, b->major)) != 0)
+ return ret;
+
+ if ((ret = int_cmp(a->minor, b->minor)) != 0)
+ return ret;
+
+ a_release = ver_cmp_release(a);
+ b_release = ver_cmp_release(b);
+ if ((ret = int_cmp(a_release, b_release)) != 0)
+ return ret;
+
+ if ((ret = int_cmp(a->post, b->post)) != 0)
+ return ret;
+
+ if ((ret = int_cmp(a->pre, b->pre)) != 0)
+ return ret;
+
+ /* We ignore forks (such as -mm and -mjb) */
+ return 0;
+}
+
+int hugetlbfs_test_feature(int feature_code)
+{
+ if (feature_code >= HUGETLB_FEATURE_NR) {
+ ERROR("hugetlbfs_test_feature: invalid feature code\n");
+ return -EINVAL;
+ }
+ return feature_mask & (1 << feature_code);
+}
+
+void __lh_setup_features()
+{
+ struct utsname u;
+ int i;
+
+ if (uname(&u)) {
+ ERROR("Getting kernel version failed: %s\n", strerror(errno));
+ return;
+ }
+
+ str_to_ver(u.release, &running_kernel_version);
+ debug_kernel_version();
+
+ for (i = 0; i < HUGETLB_FEATURE_NR; i++) {
+ struct kernel_version ver;
+ str_to_ver(kernel_features[i].required_version, &ver);
+
+ /* Is the running kernel version newer? */
+ if (ver_cmp(&running_kernel_version, &ver) >= 0) {
+ DEBUG("Feature %s is present in this kernel\n",
+ kernel_features[i].name);
+ feature_mask |= (1UL << i);
+ }
+ }
+}