Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F98587284
assign_gpu_to_rank.hpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Tue, Jan 14, 15:05
Size
2 KB
Mime Type
text/x-c
Expires
Thu, Jan 16, 15:05 (1 d, 13 h)
Engine
blob
Format
Raw Data
Handle
23599176
Attached To
R1448 Lenstool-HPC
assign_gpu_to_rank.hpp
View Options
#pragma once
#ifdef __WITH_GPU
#include <cuda_runtime.h>
#include <cuda.h>
#endif
#ifdef __WITH_MPI
#include <mpi.h>
#endif
static
int
first_time
=
1
;
static
int
myrank
=
0
;
static
int
gpu_per_node
=
0
;
static
int
SM_COUNT
=
1
;
static
int
mydev
;
#ifdef __WITH_MPI
static
char
host_name
[
MPI_MAX_PROCESSOR_NAME
];
#else
static
char
host_name
[
20
];
#endif
int
stringCmp
(
void
const
*
a
,
void
const
*
b
)
{
return
strcmp
((
char
*
)
a
,
(
char
*
)
b
);
}
void
assign_gpu_to_local_rank
()
{
#ifdef __WITH_MPI
char
(
*
host_names
)[
MPI_MAX_PROCESSOR_NAME
];
MPI_Comm
nodeComm
;
#endif
//
int
i
,
n
,
namelen
,
color
,
rank
=
0
,
nprocs
=
1
;
size_t
bytes
;
int
dev
,
err1
;
struct
cudaDeviceProp
deviceProp
;
//
#ifdef __WITH_MPI
MPI_Comm_rank
(
MPI_COMM_WORLD
,
&
rank
);
MPI_Comm_size
(
MPI_COMM_WORLD
,
&
nprocs
);
MPI_Get_processor_name
(
host_name
,
&
namelen
);
printf
(
"number of ranks = %d
\n
"
,
nprocs
);
bytes
=
nprocs
*
sizeof
(
char
[
MPI_MAX_PROCESSOR_NAME
]);
host_names
=
(
char
(
*
)[
MPI_MAX_PROCESSOR_NAME
])
malloc
(
bytes
);
strcpy
(
host_names
[
rank
],
host_name
);
for
(
n
=
0
;
n
<
nprocs
;
n
++
)
{
MPI_Bcast
(
&
(
host_names
[
n
]),
MPI_MAX_PROCESSOR_NAME
,
MPI_CHAR
,
n
,
MPI_COMM_WORLD
);
}
qsort
(
host_names
,
nprocs
,
sizeof
(
char
[
MPI_MAX_PROCESSOR_NAME
]),
stringCmp
);
color
=
0
;
for
(
n
=
0
;
n
<
nprocs
;
n
++
)
{
if
(
n
>
0
&&
strcmp
(
host_names
[
n
-
1
],
host_names
[
n
]))
color
++
;
if
(
strcmp
(
host_name
,
host_names
[
n
])
==
0
)
break
;
}
MPI_Comm_split
(
MPI_COMM_WORLD
,
color
,
0
,
&
nodeComm
);
MPI_Comm_rank
(
nodeComm
,
&
myrank
);
MPI_Comm_size
(
nodeComm
,
&
gpu_per_node
);
#else
myrank
=
0
;
#endif
int
deviceCount
,
slot
=
0
;
int
*
devloc
;
cudaGetDeviceCount
(
&
deviceCount
);
devloc
=
(
int
*
)
malloc
(
deviceCount
*
sizeof
(
int
));
devloc
[
0
]
=
999
;
for
(
dev
=
0
;
dev
<
deviceCount
;
++
dev
)
{
cudaGetDeviceProperties
(
&
deviceProp
,
dev
);
{
devloc
[
slot
]
=
dev
;
slot
++
;
};
}
int
gpu_count_err
=
0
,
global_gpu_count_err
=
0
;
if
(
slot
<
gpu_per_node
)
{
if
(
myrank
==
0
)
printf
(
"!!! ERROR: Not enough GPUs on node %s, %d GPUs found, %d GPUs required !!!
\n
"
,
host_name
,
slot
,
gpu_per_node
);
gpu_count_err
=
1
;
}
#ifdef MPI
MPI_Allreduce
(
&
gpu_count_err
,
&
global_gpu_count_err
,
1
,
MPI_INT
,
MPI_SUM
,
MPI_COMM_WORLD
);
#else
global_gpu_count_err
=
gpu_count_err
;
#endif
if
(
global_gpu_count_err
>
0
)
{
#ifdef MPI
MPI_Finalize
();
#endif
exit
(
1
);
return
;
}
printf
(
"rank %d Assigning device %d to process on node %s
\n
"
,
rank
,
devloc
[
myrank
],
host_name
);
cudaSetDevice
(
devloc
[
myrank
]);
mydev
=
devloc
[
myrank
];
}
Event Timeline
Log In to Comment