File Metadata

Created: Sun, Jul 6, 14:21

assign_gpu_to_rank.hpp
View Options

	#pragma once
	#ifdef __WITH_GPU
	#include <cuda_runtime.h>
	#include <cuda.h>
	#endif
	#ifdef __WITH_MPI
	#include <mpi.h>
	#endif

	static int first_time=1;
	static int myrank=0;
	static int gpu_per_node=0;
	static int SM_COUNT=1;
	static int mydev;

	#ifdef __WITH_MPI
	static char host_name[MPI_MAX_PROCESSOR_NAME];
	#else
	static char host_name[20];
	#endif

	int stringCmp( void const a, void const b)
	{
	return strcmp((char) a, (char)b);
	}




	void
	assign_gpu_to_local_rank()
	{
	#ifdef __WITH_MPI
	char (*host_names)[MPI_MAX_PROCESSOR_NAME];
	MPI_Comm nodeComm;
	#endif
	//
	int i, n, namelen, color, rank = 0, nprocs = 1;
	size_t bytes;
	int dev, err1;
	struct cudaDeviceProp deviceProp;
	//
	#ifdef __WITH_MPI
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
	MPI_Get_processor_name(host_name,&namelen);
	printf("number of ranks = %d\n", nprocs);

	bytes = nprocs * sizeof(char[MPI_MAX_PROCESSOR_NAME]);
	host_names = (char (*)[MPI_MAX_PROCESSOR_NAME]) malloc(bytes);

	strcpy(host_names[rank], host_name);

	for (n=0; n<nprocs; n++)
	{
	MPI_Bcast(&(host_names[n]),MPI_MAX_PROCESSOR_NAME, MPI_CHAR, n, MPI_COMM_WORLD);
	}


	qsort(host_names, nprocs, sizeof(char[MPI_MAX_PROCESSOR_NAME]), stringCmp);

	color = 0;

	for (n=0; n<nprocs; n++)
	{
	if(n>0&&strcmp(host_names[n-1], host_names[n])) color++;
	if(strcmp(host_name, host_names[n]) == 0) break;
	}

	MPI_Comm_split(MPI_COMM_WORLD, color, 0, &nodeComm);

	MPI_Comm_rank(nodeComm, &myrank);
	MPI_Comm_size(nodeComm, &gpu_per_node);

	#else
	myrank = 0;
	#endif



	int deviceCount,slot=0;
	int *devloc;
	cudaGetDeviceCount(&deviceCount);
	devloc=(int )malloc(deviceCountsizeof(int));
	devloc[0]=999;
	for (dev = 0; dev < deviceCount; ++dev)
	{
	cudaGetDeviceProperties(&deviceProp, dev);
	{
	devloc[slot]=dev;
	slot++;
	};
	}

	int gpu_count_err=0, global_gpu_count_err=0;
	if(slot<gpu_per_node)
	{
	if(myrank==0) printf ("!!! ERROR: Not enough GPUs on node %s, %d GPUs found, %d GPUs required !!!\n",host_name,slot,gpu_per_node);
	gpu_count_err = 1;
	}
	#ifdef MPI
	MPI_Allreduce( &gpu_count_err, &global_gpu_count_err, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
	#else
	global_gpu_count_err = gpu_count_err;
	#endif
	if(global_gpu_count_err>0)
	{
	#ifdef MPI
	MPI_Finalize();
	#endif
	exit(1);
	return;
	}
	printf ("rank %d Assigning device %d to process on node %s \n", rank, devloc[myrank], host_name );
	cudaSetDevice(devloc[myrank]);
	mydev = devloc[myrank];

	}

assign_gpu_to_rank.hpp
No OneTemporary
Actions

File Metadata

assign_gpu_to_rank.hpp
View Options

Event Timeline

assign_gpu_to_rank.hppNo OneTemporaryActions

File Metadata

assign_gpu_to_rank.hppView Options

Event Timeline

assign_gpu_to_rank.hpp
No OneTemporary
Actions

assign_gpu_to_rank.hpp
View Options