Page MenuHomec4science

assign_gpu_to_rank.hpp
No OneTemporary

File Metadata

Created
Tue, Jan 14, 15:05

assign_gpu_to_rank.hpp

#pragma once
#ifdef __WITH_GPU
#include <cuda_runtime.h>
#include <cuda.h>
#endif
#ifdef __WITH_MPI
#include <mpi.h>
#endif
static int first_time=1;
static int myrank=0;
static int gpu_per_node=0;
static int SM_COUNT=1;
static int mydev;
#ifdef __WITH_MPI
static char host_name[MPI_MAX_PROCESSOR_NAME];
#else
static char host_name[20];
#endif
int stringCmp( void const *a, void const *b)
{
return strcmp((char*) a, (char*)b);
}
void
assign_gpu_to_local_rank()
{
#ifdef __WITH_MPI
char (*host_names)[MPI_MAX_PROCESSOR_NAME];
MPI_Comm nodeComm;
#endif
//
int i, n, namelen, color, rank = 0, nprocs = 1;
size_t bytes;
int dev, err1;
struct cudaDeviceProp deviceProp;
//
#ifdef __WITH_MPI
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
MPI_Get_processor_name(host_name,&namelen);
printf("number of ranks = %d\n", nprocs);
bytes = nprocs * sizeof(char[MPI_MAX_PROCESSOR_NAME]);
host_names = (char (*)[MPI_MAX_PROCESSOR_NAME]) malloc(bytes);
strcpy(host_names[rank], host_name);
for (n=0; n<nprocs; n++)
{
MPI_Bcast(&(host_names[n]),MPI_MAX_PROCESSOR_NAME, MPI_CHAR, n, MPI_COMM_WORLD);
}
qsort(host_names, nprocs, sizeof(char[MPI_MAX_PROCESSOR_NAME]), stringCmp);
color = 0;
for (n=0; n<nprocs; n++)
{
if(n>0&&strcmp(host_names[n-1], host_names[n])) color++;
if(strcmp(host_name, host_names[n]) == 0) break;
}
MPI_Comm_split(MPI_COMM_WORLD, color, 0, &nodeComm);
MPI_Comm_rank(nodeComm, &myrank);
MPI_Comm_size(nodeComm, &gpu_per_node);
#else
myrank = 0;
#endif
int deviceCount,slot=0;
int *devloc;
cudaGetDeviceCount(&deviceCount);
devloc=(int *)malloc(deviceCount*sizeof(int));
devloc[0]=999;
for (dev = 0; dev < deviceCount; ++dev)
{
cudaGetDeviceProperties(&deviceProp, dev);
{
devloc[slot]=dev;
slot++;
};
}
int gpu_count_err=0, global_gpu_count_err=0;
if(slot<gpu_per_node)
{
if(myrank==0) printf ("!!! ERROR: Not enough GPUs on node %s, %d GPUs found, %d GPUs required !!!\n",host_name,slot,gpu_per_node);
gpu_count_err = 1;
}
#ifdef MPI
MPI_Allreduce( &gpu_count_err, &global_gpu_count_err, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
#else
global_gpu_count_err = gpu_count_err;
#endif
if(global_gpu_count_err>0)
{
#ifdef MPI
MPI_Finalize();
#endif
exit(1);
return;
}
printf ("rank %d Assigning device %d to process on node %s \n", rank, devloc[myrank], host_name );
cudaSetDevice(devloc[myrank]);
mydev = devloc[myrank];
}

Event Timeline