Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Correct hardware-independent thread pinning #10

Open
schuay opened this issue Sep 29, 2015 · 1 comment
Open

Correct hardware-independent thread pinning #10

schuay opened this issue Sep 29, 2015 · 1 comment
Assignees

Comments

@schuay
Copy link

schuay commented Sep 29, 2015

Have you thought about using hwloc to do hardware independent thread pinning? It would add a dependency on the one hand, but make things much easier for people running this lib on the other. It'd also let allow you to remove the horrible the_cores hack ;)

A quick draft of the required changes below:

diff --git a/common/Makefile.common b/common/Makefile.common
index d8bba15..f51050b 100644
--- a/common/Makefile.common
+++ b/common/Makefile.common
@@ -248,7 +248,7 @@ CFLAGS += -Wall
 CFLAGS += -fno-strict-aliasing
 CFLAGS += -I$(LIBAO_INC) -I$(ROOT)/include -I$(LIBSSMEM)/include

-LDFLAGS += -lpthread -lrt -lm
+LDFLAGS += -lpthread -lrt -lm -lhwloc

 ######################
 # compilation settings
diff --git a/include/utils.h b/include/utils.h
index 609fe5a..c5e4f92 100644
--- a/include/utils.h
+++ b/include/utils.h
@@ -655,36 +655,25 @@ static __attribute__ ((unused)) double eng_per_test_iter_nj[40][5] =
     return (double)t.tv_sec + ((double)t.tv_usec)/1000000.0;
   }

+#include <hwloc.h>
+  hwloc_topology_t m_topology;
+
   static inline 
   void set_cpu(int cpu) 
   {
-#ifndef NO_SET_CPU
-#  ifdef __sparc__
-    processor_bind(P_LWPID,P_MYID, cpu, NULL);
-#  elif defined(__tile__)
-    if (cpu>=tmc_cpus_grid_total()) {
-      perror("Thread id too high");
+    const int depth = hwloc_get_type_or_below_depth(m_topology, HWLOC_OBJ_CORE);
+    const int ncores = hwloc_get_nbobjs_by_depth(m_topology, depth);
+
+    const hwloc_obj_t obj = hwloc_get_obj_by_depth(m_topology, depth, cpu % ncores);
+
+    hwloc_cpuset_t cpuset = hwloc_bitmap_dup(obj->cpuset);
+    hwloc_bitmap_singlify(cpuset);
+
+    if (hwloc_set_cpubind(m_topology, cpuset, HWLOC_CPUBIND_THREAD) != 0) {
+        fprintf(stderr, "Could not bind to core: %s\n", strerror(errno));
     }
-    // cput_set_t cpus;
-    if (tmc_cpus_set_my_cpu(cpu)<0) {
-      tmc_task_die("tmc_cpus_set_my_cpu() failed."); 
-    }    
-#  else
-    cpu %= (NUMBER_OF_SOCKETS * CORES_PER_SOCKET);
-
-    cpu_set_t mask;
-    CPU_ZERO(&mask);
-    CPU_SET(cpu, &mask);
-#    if defined(PLATFORM_NUMA)
-    numa_set_preferred(get_cluster(cpu));
-#    endif
-    pthread_t thread = pthread_self();
-    if (pthread_setaffinity_np(thread, sizeof(cpu_set_t), &mask) != 0) 
-      {
-   fprintf(stderr, "Error setting thread affinity\n");
-      }
-#  endif
-#endif
+
+    hwloc_bitmap_free(cpuset);    
   }


diff --git a/src/priorityqueue-alistarh/test_simple.c b/src/priorityqueue-alistarh/test_simple.c
index 03099f7..5c1ebdc 100755
--- a/src/priorityqueue-alistarh/test_simple.c
+++ b/src/priorityqueue-alistarh/test_simple.c
@@ -130,7 +130,7 @@ test(void* thread)
   thread_data_t* td = (thread_data_t*) thread;
   uint32_t ID = td->id;
   int phys_id = the_cores[ID];
-  set_cpu(phys_id);
+  set_cpu(ID);
   ssalloc_init();

   DS_TYPE* set = td->set;
@@ -295,10 +295,16 @@ test(void* thread)
   pthread_exit(NULL);
 }

+#include <hwloc.h>
+extern hwloc_topology_t m_topology;
+
 int
 main(int argc, char **argv) 
 {
-  set_cpu(the_cores[0]);
+  hwloc_topology_init(&m_topology);
+  hwloc_topology_load(m_topology);
+
+  set_cpu(0);
   ssalloc_init();
   seeds = seed_rand();

@@ -398,8 +404,6 @@ main(int argc, char **argv)
    }
     }

-
-
   if (!is_power_of_two(initial))
     {
       size_t initial_pow2 = pow2roundup(initial);
@@ -608,6 +612,8 @@ main(int argc, char **argv)
   RR_PRINT_UNPROTECTED(RAPL_PRINT_POW);
   RR_PRINT_CORRECTED();
   RETRY_STATS_PRINT(total, putting_count_total, removing_count_total, putting_count_total_succ + removing_count_total_succ);
+
+  hwloc_topology_destroy(m_topology);

   pthread_exit(NULL);

@trigonak trigonak self-assigned this Sep 30, 2015
@trigonak
Copy link
Member

That is a good idea. We will integrate it as an option as soon as possible.
Thanks.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants