Index: src/libbu/CMakeLists.txt
===================================================================
--- src/libbu/CMakeLists.txt	(revision 70457)
+++ src/libbu/CMakeLists.txt	(working copy)
@@ -42,6 +42,9 @@
   set_property(SOURCE y2038/time64.c APPEND PROPERTY COMPILE_FLAGS "-fPIC")
 endif(HAVE_FPIC)

+# We need C++11 support.
+set(CMAKE_CXX_STANDARD 11)
+
 set(LIBBU_SOURCES
   y2038/time64.c
   affinity.c
@@ -94,6 +97,7 @@
   mread.c
   observer.c
   opt.c
+  parallel_cpp11thread.cpp
   parallel.c
   parse.c
   path.c
Index: src/libbu/parallel.c
===================================================================
--- src/libbu/parallel.c	(revision 70457)
+++ src/libbu/parallel.c	(working copy)
@@ -452,11 +452,11 @@
 void
 bu_parallel(void (*func)(int, void *), size_t ncpu, void *arg)
 {
+    if (!func)
+    return; /* nothing to do */
+
 #ifndef PARALLEL

-    if (!func)
-	return; /* nothing to do */
-
     bu_log("bu_parallel(%zu., %p):  Not compiled for PARALLEL machine, running single-threaded\n", ncpu, arg);
     /* do the work anyways */
     (*func)(0, arg);
@@ -463,6 +463,24 @@

 #else

+    if (UNLIKELY(bu_debug & BU_DEBUG_PARALLEL))
+    bu_log("bu_parallel(%zu, %p)\n", ncpu, arg);
+
+    if (ncpu > MAX_PSW) {
+    bu_log("WARNING: bu_parallel() ncpu(%zd) > MAX_PSW(%d), adjusting ncpu\n", ncpu, MAX_PSW);
+    ncpu = MAX_PSW;
+    }
+
+#if HAVE_THREAD_LOCAL
+
+    /* no need for parallelization here */
+    if (ncpu == 1)
+    func(ncpu, arg);
+
+    bu_parallel_cpp(func, ncpu, arg);
+
+#else
+
     struct thread_data *thread_context;
     rt_thread_t thread_tbl[MAX_PSW];
     size_t avail_cpus = 1;
@@ -485,17 +503,6 @@

     rt_thread_t thread;

-    if (!func)
-	return; /* nothing to do */
-
-    if (UNLIKELY(bu_debug & BU_DEBUG_PARALLEL))
-	bu_log("bu_parallel(%zu, %p)\n", ncpu, arg);
-
-    if (ncpu > MAX_PSW) {
-	bu_log("WARNING: bu_parallel() ncpu(%zd) > MAX_PSW(%d), adjusting ncpu\n", ncpu, MAX_PSW);
-	ncpu = MAX_PSW;
-    }
-
     libbu_affinity = getenv("LIBBU_AFFINITY");
     if (libbu_affinity)
 	affinity = (int)strtol(libbu_affinity, NULL, 0x10);
@@ -511,7 +518,7 @@
 	/* otherwise, limit ourselves to what is actually available */
 	avail_cpus = bu_avail_cpus();
 	if (ncpu > avail_cpus) {
-	    bu_log("%zd cpus requested, but only %d available\n", ncpu, avail_cpus);
+	    bu_log("%zd cpus requested, but only %zu available\n", ncpu, avail_cpus);
 	    ncpu = avail_cpus;
 	}
     }
@@ -604,7 +611,7 @@

     if (UNLIKELY(bu_debug & BU_DEBUG_PARALLEL))
 	for (i = 0; i < nthreadc; i++)
-	    bu_log("bu_parallel(): thread_tbl[%d] = 0x%x\n", i, thread_tbl[i]);
+	    bu_log("bu_parallel(): thread_tbl[%zu] = 0x%x\n", i, thread_tbl[i]);

     /*
      * Wait for completion of all threads.  We don't wait for threads
@@ -696,12 +703,12 @@
 	int ret;

 	if (UNLIKELY(bu_debug & BU_DEBUG_PARALLEL))
-	    bu_log("bu_parallel(): waiting for thread %p to complete:\t(loop:%d) (nthreadc:%zu) (nthreade:%zu)\n",
+	    bu_log("bu_parallel(): waiting for thread %p to complete:\t(loop:%zu) (nthreadc:%zu) (nthreade:%zu)\n",
 		   (void *)thread_tbl[x], x, nthreadc, nthreade);

 	if ((ret = pthread_join(thread_tbl[x], NULL)) != 0) {
 	    /* badness happened */
-	    bu_log("pthread_join(thread_tbl[%d]=%p) ret=%d\n", x, (void *)thread_tbl[x], ret);
+	    bu_log("pthread_join(thread_tbl[%zu]=%p) ret=%d\n", x, (void *)thread_tbl[x], ret);
 	}

 	nthreade++;
@@ -772,6 +779,7 @@

     bu_free(thread_context, "struct thread_data *thread_context");

+#endif /* HAVE_THREAD_LOCAL */
 #endif /* PARALLEL */

     return;
Index: src/libbu/parallel.h
===================================================================
--- src/libbu/parallel.h	(revision 70457)
+++ src/libbu/parallel.h	(working copy)
@@ -36,6 +36,12 @@
 extern void thread_set_cpu(int cpu);
 extern int thread_get_cpu(void);

+/**
+ * Use C++11's std::thread for parallelization.
+ */
+extern void bu_parallel_cpp(void (*func)(int, void *), size_t ncpu, void *arg);
+
+
 #endif /* LIBBU_PARALLEL_H */

 /*
Index: src/libbu/parallel_cpp11thread.cpp
===================================================================
--- src/libbu/parallel_cpp11thread.cpp	(nonexistent)
+++ src/libbu/parallel_cpp11thread.cpp	(working copy)
@@ -0,0 +1,58 @@
+/*          P A R A L L E L _ C P P 1 1 T H R E A D . C P P
+ * BRL-CAD
+ *
+ * Copyright (c) 2013-2016 United States Government as represented by
+ * the U.S. Army Research Laboratory.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * version 2.1 as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this file; see the file named COPYING for more
+ * information.
+ */
+
+#include <thread>
+#include <vector>
+#include <stddef.h>
+
+void bu_parallel_cpp(void (*func)(int, void *), size_t ncpu, void *arg)
+{
+	std::vector<std::thread> threads;
+	std::terminate();
+
+	if (!ncpu) {
+	ncpu = std::thread::hardware_concurrency();
+
+	/* If ncpu is 0 now, then the hardware either doesn't support
+	 * threads, or they aren't known to the implementation.
+	 * Revert to single threading.
+	 */
+	if (!ncpu)
+		return func(ncpu, arg);
+	}
+
+	/* Create and run threads. */
+	for (std::size_t i = 0; i < ncpu; ++i)
+	threads.emplace_back(func, ncpu, arg);
+
+	/* Wait for the parallel task to complete. */
+	for (std::size_t i = 0; i < threads.size(); ++i)
+	threads[i].join();
+}
+
+/*
+ * Local Variables:
+ * mode: C
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * c-file-style: "stroustrup"
+ * End:
+ * ex: shiftwidth=4 tabstop=8
+ */