Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F98587284
assign_gpu_to_rank.hpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Tue, Jan 14, 15:05
Size
2 KB
Mime Type
text/x-c
Expires
Thu, Jan 16, 15:05 (2 d)
Engine
blob
Format
Raw Data
Handle
23599176
Attached To
R1448 Lenstool-HPC
assign_gpu_to_rank.hpp
View Options
#pragma once
#ifdef __WITH_GPU
#include <cuda_runtime.h>
#include <cuda.h>
#endif
#ifdef __WITH_MPI
#include <mpi.h>
#endif
static int first_time=1;
static int myrank=0;
static int gpu_per_node=0;
static int SM_COUNT=1;
static int mydev;
#ifdef __WITH_MPI
static char host_name[MPI_MAX_PROCESSOR_NAME];
#else
static char host_name[20];
#endif
int stringCmp( void const *a, void const *b)
{
return strcmp((char*) a, (char*)b);
}
void
assign_gpu_to_local_rank()
{
#ifdef __WITH_MPI
char (*host_names)[MPI_MAX_PROCESSOR_NAME];
MPI_Comm nodeComm;
#endif
//
int i, n, namelen, color, rank = 0, nprocs = 1;
size_t bytes;
int dev, err1;
struct cudaDeviceProp deviceProp;
//
#ifdef __WITH_MPI
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
MPI_Get_processor_name(host_name,&namelen);
printf("number of ranks = %d\n", nprocs);
bytes = nprocs * sizeof(char[MPI_MAX_PROCESSOR_NAME]);
host_names = (char (*)[MPI_MAX_PROCESSOR_NAME]) malloc(bytes);
strcpy(host_names[rank], host_name);
for (n=0; n<nprocs; n++)
{
MPI_Bcast(&(host_names[n]),MPI_MAX_PROCESSOR_NAME, MPI_CHAR, n, MPI_COMM_WORLD);
}
qsort(host_names, nprocs, sizeof(char[MPI_MAX_PROCESSOR_NAME]), stringCmp);
color = 0;
for (n=0; n<nprocs; n++)
{
if(n>0&&strcmp(host_names[n-1], host_names[n])) color++;
if(strcmp(host_name, host_names[n]) == 0) break;
}
MPI_Comm_split(MPI_COMM_WORLD, color, 0, &nodeComm);
MPI_Comm_rank(nodeComm, &myrank);
MPI_Comm_size(nodeComm, &gpu_per_node);
#else
myrank = 0;
#endif
int deviceCount,slot=0;
int *devloc;
cudaGetDeviceCount(&deviceCount);
devloc=(int *)malloc(deviceCount*sizeof(int));
devloc[0]=999;
for (dev = 0; dev < deviceCount; ++dev)
{
cudaGetDeviceProperties(&deviceProp, dev);
{
devloc[slot]=dev;
slot++;
};
}
int gpu_count_err=0, global_gpu_count_err=0;
if(slot<gpu_per_node)
{
if(myrank==0) printf ("!!! ERROR: Not enough GPUs on node %s, %d GPUs found, %d GPUs required !!!\n",host_name,slot,gpu_per_node);
gpu_count_err = 1;
}
#ifdef MPI
MPI_Allreduce( &gpu_count_err, &global_gpu_count_err, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
#else
global_gpu_count_err = gpu_count_err;
#endif
if(global_gpu_count_err>0)
{
#ifdef MPI
MPI_Finalize();
#endif
exit(1);
return;
}
printf ("rank %d Assigning device %d to process on node %s \n", rank, devloc[myrank], host_name );
cudaSetDevice(devloc[myrank]);
mydev = devloc[myrank];
}
Event Timeline
Log In to Comment