Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F102273337
gb_gpu_extra.h
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Tue, Feb 18, 23:56
Size
8 KB
Mime Type
text/x-c
Expires
Thu, Feb 20, 23:56 (2 d)
Engine
blob
Format
Raw Data
Handle
24321578
Attached To
rLAMMPS lammps
gb_gpu_extra.h
View Options
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
#ifndef GB_GPU_EXTRA_H
#define GB_GPU_EXTRA_H
enum
{
SPHERE_SPHERE
,
SPHERE_ELLIPSE
,
ELLIPSE_SPHERE
,
ELLIPSE_ELLIPSE
};
#ifdef _DOUBLE_DOUBLE
#define numtyp double
#define numtyp2 double2
#define numtyp4 double4
#define acctyp double
#define acctyp4 double4
#endif
#ifdef _SINGLE_DOUBLE
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp double
#define acctyp4 double4
#endif
#ifndef numtyp
#define numtyp float
#define numtyp2 float2
#define numtyp4 float4
#define acctyp float
#define acctyp4 float4
#endif
#ifdef NV_KERNEL
#include "nv_kernel_def.h"
#else
#pragma OPENCL EXTENSION cl_khr_fp64: enable
#define GLOBAL_ID_X get_global_id(0)
#define THREAD_ID_X get_local_id(0)
#define BLOCK_ID_X get_group_id(0)
#define BLOCK_SIZE_X get_local_size(0)
#define __syncthreads() barrier(CLK_LOCAL_MEM_FENCE)
#define __inline inline
#define BLOCK_PAIR 64
#define MAX_SHARED_TYPES 8
#endif
/* ----------------------------------------------------------------------
dot product of 2 vectors
------------------------------------------------------------------------- */
__inline
numtyp
gpu_dot3
(
const
numtyp
*
v1
,
const
numtyp
*
v2
)
{
return
v1
[
0
]
*
v2
[
0
]
+
v1
[
1
]
*
v2
[
1
]
+
v1
[
2
]
*
v2
[
2
];
}
/* ----------------------------------------------------------------------
cross product of 2 vectors
------------------------------------------------------------------------- */
__inline
void
gpu_cross3
(
const
numtyp
*
v1
,
const
numtyp
*
v2
,
numtyp
*
ans
)
{
ans
[
0
]
=
v1
[
1
]
*
v2
[
2
]
-
v1
[
2
]
*
v2
[
1
];
ans
[
1
]
=
v1
[
2
]
*
v2
[
0
]
-
v1
[
0
]
*
v2
[
2
];
ans
[
2
]
=
v1
[
0
]
*
v2
[
1
]
-
v1
[
1
]
*
v2
[
0
];
}
/* ----------------------------------------------------------------------
determinant of a matrix
------------------------------------------------------------------------- */
__inline
numtyp
gpu_det3
(
const
numtyp
m
[
9
])
{
numtyp
ans
=
m
[
0
]
*
m
[
4
]
*
m
[
8
]
-
m
[
0
]
*
m
[
5
]
*
m
[
7
]
-
m
[
3
]
*
m
[
1
]
*
m
[
8
]
+
m
[
3
]
*
m
[
2
]
*
m
[
7
]
+
m
[
6
]
*
m
[
1
]
*
m
[
5
]
-
m
[
6
]
*
m
[
2
]
*
m
[
4
];
return
ans
;
}
/* ----------------------------------------------------------------------
diagonal matrix times a full matrix
------------------------------------------------------------------------- */
__inline
void
gpu_times3
(
const
numtyp4
shape
,
const
numtyp
m
[
9
],
numtyp
ans
[
9
])
{
ans
[
0
]
=
shape
.
x
*
m
[
0
];
ans
[
1
]
=
shape
.
x
*
m
[
1
];
ans
[
2
]
=
shape
.
x
*
m
[
2
];
ans
[
3
]
=
shape
.
y
*
m
[
3
];
ans
[
4
]
=
shape
.
y
*
m
[
4
];
ans
[
5
]
=
shape
.
y
*
m
[
5
];
ans
[
6
]
=
shape
.
z
*
m
[
6
];
ans
[
7
]
=
shape
.
z
*
m
[
7
];
ans
[
8
]
=
shape
.
z
*
m
[
8
];
}
/* ----------------------------------------------------------------------
add two matrices
------------------------------------------------------------------------- */
__inline
void
gpu_plus3
(
const
numtyp
m
[
9
],
const
numtyp
m2
[
9
],
numtyp
ans
[
9
])
{
ans
[
0
]
=
m
[
0
]
+
m2
[
0
];
ans
[
1
]
=
m
[
1
]
+
m2
[
1
];
ans
[
2
]
=
m
[
2
]
+
m2
[
2
];
ans
[
3
]
=
m
[
3
]
+
m2
[
3
];
ans
[
4
]
=
m
[
4
]
+
m2
[
4
];
ans
[
5
]
=
m
[
5
]
+
m2
[
5
];
ans
[
6
]
=
m
[
6
]
+
m2
[
6
];
ans
[
7
]
=
m
[
7
]
+
m2
[
7
];
ans
[
8
]
=
m
[
8
]
+
m2
[
8
];
}
/* ----------------------------------------------------------------------
multiply the transpose of mat1 times mat2
------------------------------------------------------------------------- */
__inline
void
gpu_transpose_times3
(
const
numtyp
m
[
9
],
const
numtyp
m2
[
9
],
numtyp
ans
[
9
])
{
ans
[
0
]
=
m
[
0
]
*
m2
[
0
]
+
m
[
3
]
*
m2
[
3
]
+
m
[
6
]
*
m2
[
6
];
ans
[
1
]
=
m
[
0
]
*
m2
[
1
]
+
m
[
3
]
*
m2
[
4
]
+
m
[
6
]
*
m2
[
7
];
ans
[
2
]
=
m
[
0
]
*
m2
[
2
]
+
m
[
3
]
*
m2
[
5
]
+
m
[
6
]
*
m2
[
8
];
ans
[
3
]
=
m
[
1
]
*
m2
[
0
]
+
m
[
4
]
*
m2
[
3
]
+
m
[
7
]
*
m2
[
6
];
ans
[
4
]
=
m
[
1
]
*
m2
[
1
]
+
m
[
4
]
*
m2
[
4
]
+
m
[
7
]
*
m2
[
7
];
ans
[
5
]
=
m
[
1
]
*
m2
[
2
]
+
m
[
4
]
*
m2
[
5
]
+
m
[
7
]
*
m2
[
8
];
ans
[
6
]
=
m
[
2
]
*
m2
[
0
]
+
m
[
5
]
*
m2
[
3
]
+
m
[
8
]
*
m2
[
6
];
ans
[
7
]
=
m
[
2
]
*
m2
[
1
]
+
m
[
5
]
*
m2
[
4
]
+
m
[
8
]
*
m2
[
7
];
ans
[
8
]
=
m
[
2
]
*
m2
[
2
]
+
m
[
5
]
*
m2
[
5
]
+
m
[
8
]
*
m2
[
8
];
}
/* ----------------------------------------------------------------------
row vector times matrix
------------------------------------------------------------------------- */
__inline
void
gpu_row_times3
(
const
numtyp
*
v
,
const
numtyp
m
[
9
],
numtyp
*
ans
)
{
ans
[
0
]
=
m
[
0
]
*
v
[
0
]
+
v
[
1
]
*
m
[
3
]
+
v
[
2
]
*
m
[
6
];
ans
[
1
]
=
v
[
0
]
*
m
[
1
]
+
m
[
4
]
*
v
[
1
]
+
v
[
2
]
*
m
[
7
];
ans
[
2
]
=
v
[
0
]
*
m
[
2
]
+
v
[
1
]
*
m
[
5
]
+
m
[
8
]
*
v
[
2
];
}
/* ----------------------------------------------------------------------
solve Ax = b or M ans = v
use gaussian elimination & partial pivoting on matrix
error_flag set to 2 if bad matrix inversion attempted
------------------------------------------------------------------------- */
__inline
void
gpu_mldivide3
(
const
numtyp
m
[
9
],
const
numtyp
*
v
,
numtyp
*
ans
,
__global
int
*
error_flag
)
{
// create augmented matrix for pivoting
numtyp
aug
[
12
],
t
;
aug
[
3
]
=
v
[
0
];
aug
[
0
]
=
m
[
0
];
aug
[
1
]
=
m
[
1
];
aug
[
2
]
=
m
[
2
];
aug
[
7
]
=
v
[
1
];
aug
[
4
]
=
m
[
3
];
aug
[
5
]
=
m
[
4
];
aug
[
6
]
=
m
[
5
];
aug
[
11
]
=
v
[
2
];
aug
[
8
]
=
m
[
6
];
aug
[
9
]
=
m
[
7
];
aug
[
10
]
=
m
[
8
];
if
(
fabs
(
aug
[
4
])
>
fabs
(
aug
[
0
]))
{
numtyp
swapt
;
swapt
=
aug
[
0
];
aug
[
0
]
=
aug
[
4
];
aug
[
4
]
=
swapt
;
swapt
=
aug
[
1
];
aug
[
1
]
=
aug
[
5
];
aug
[
5
]
=
swapt
;
swapt
=
aug
[
2
];
aug
[
2
]
=
aug
[
6
];
aug
[
6
]
=
swapt
;
swapt
=
aug
[
3
];
aug
[
3
]
=
aug
[
7
];
aug
[
7
]
=
swapt
;
}
if
(
fabs
(
aug
[
8
])
>
fabs
(
aug
[
0
]))
{
numtyp
swapt
;
swapt
=
aug
[
0
];
aug
[
0
]
=
aug
[
8
];
aug
[
8
]
=
swapt
;
swapt
=
aug
[
1
];
aug
[
1
]
=
aug
[
9
];
aug
[
9
]
=
swapt
;
swapt
=
aug
[
2
];
aug
[
2
]
=
aug
[
10
];
aug
[
10
]
=
swapt
;
swapt
=
aug
[
3
];
aug
[
3
]
=
aug
[
11
];
aug
[
11
]
=
swapt
;
}
if
(
aug
[
0
]
!=
(
numtyp
)
0.0
)
{
if
(
0
!=
0
)
{
numtyp
swapt
;
swapt
=
aug
[
0
];
aug
[
0
]
=
aug
[
0
];
aug
[
0
]
=
swapt
;
swapt
=
aug
[
1
];
aug
[
1
]
=
aug
[
1
];
aug
[
1
]
=
swapt
;
swapt
=
aug
[
2
];
aug
[
2
]
=
aug
[
2
];
aug
[
2
]
=
swapt
;
swapt
=
aug
[
3
];
aug
[
3
]
=
aug
[
3
];
aug
[
3
]
=
swapt
;
}
}
else
if
(
aug
[
4
]
!=
(
numtyp
)
0.0
)
{
if
(
1
!=
0
)
{
numtyp
swapt
;
swapt
=
aug
[
0
];
aug
[
0
]
=
aug
[
4
];
aug
[
4
]
=
swapt
;
swapt
=
aug
[
1
];
aug
[
1
]
=
aug
[
5
];
aug
[
5
]
=
swapt
;
swapt
=
aug
[
2
];
aug
[
2
]
=
aug
[
6
];
aug
[
6
]
=
swapt
;
swapt
=
aug
[
3
];
aug
[
3
]
=
aug
[
7
];
aug
[
7
]
=
swapt
;
}
}
else
if
(
aug
[
8
]
!=
(
numtyp
)
0.0
)
{
if
(
2
!=
0
)
{
numtyp
swapt
;
swapt
=
aug
[
0
];
aug
[
0
]
=
aug
[
8
];
aug
[
8
]
=
swapt
;
swapt
=
aug
[
1
];
aug
[
1
]
=
aug
[
9
];
aug
[
9
]
=
swapt
;
swapt
=
aug
[
2
];
aug
[
2
]
=
aug
[
10
];
aug
[
10
]
=
swapt
;
swapt
=
aug
[
3
];
aug
[
3
]
=
aug
[
11
];
aug
[
11
]
=
swapt
;
}
}
else
*
error_flag
=
2
;
t
=
aug
[
4
]
/
aug
[
0
];
aug
[
5
]
-=
t
*
aug
[
1
];
aug
[
6
]
-=
t
*
aug
[
2
];
aug
[
7
]
-=
t
*
aug
[
3
];
t
=
aug
[
8
]
/
aug
[
0
];
aug
[
9
]
-=
t
*
aug
[
1
];
aug
[
10
]
-=
t
*
aug
[
2
];
aug
[
11
]
-=
t
*
aug
[
3
];
if
(
fabs
(
aug
[
9
])
>
fabs
(
aug
[
5
]))
{
numtyp
swapt
;
swapt
=
aug
[
4
];
aug
[
4
]
=
aug
[
8
];
aug
[
8
]
=
swapt
;
swapt
=
aug
[
5
];
aug
[
5
]
=
aug
[
9
];
aug
[
9
]
=
swapt
;
swapt
=
aug
[
6
];
aug
[
6
]
=
aug
[
10
];
aug
[
10
]
=
swapt
;
swapt
=
aug
[
7
];
aug
[
7
]
=
aug
[
11
];
aug
[
11
]
=
swapt
;
}
if
(
aug
[
5
]
!=
(
numtyp
)
0.0
)
{
if
(
1
!=
1
)
{
numtyp
swapt
;
swapt
=
aug
[
4
];
aug
[
4
]
=
aug
[
4
];
aug
[
4
]
=
swapt
;
swapt
=
aug
[
5
];
aug
[
5
]
=
aug
[
5
];
aug
[
5
]
=
swapt
;
swapt
=
aug
[
6
];
aug
[
6
]
=
aug
[
6
];
aug
[
6
]
=
swapt
;
swapt
=
aug
[
7
];
aug
[
7
]
=
aug
[
7
];
aug
[
7
]
=
swapt
;
}
}
else
if
(
aug
[
9
]
!=
(
numtyp
)
0.0
)
{
if
(
2
!=
1
)
{
numtyp
swapt
;
swapt
=
aug
[
4
];
aug
[
4
]
=
aug
[
8
];
aug
[
8
]
=
swapt
;
swapt
=
aug
[
5
];
aug
[
5
]
=
aug
[
9
];
aug
[
9
]
=
swapt
;
swapt
=
aug
[
6
];
aug
[
6
]
=
aug
[
10
];
aug
[
10
]
=
swapt
;
swapt
=
aug
[
7
];
aug
[
7
]
=
aug
[
11
];
aug
[
11
]
=
swapt
;
}
}
t
=
aug
[
9
]
/
aug
[
5
];
aug
[
10
]
-=
t
*
aug
[
6
];
aug
[
11
]
-=
t
*
aug
[
7
];
if
(
aug
[
10
]
==
(
numtyp
)
0.0
)
*
error_flag
=
2
;
ans
[
2
]
=
aug
[
11
]
/
aug
[
10
];
t
=
(
numtyp
)
0.0
;
t
+=
aug
[
6
]
*
ans
[
2
];
ans
[
1
]
=
(
aug
[
7
]
-
t
)
/
aug
[
5
];
t
=
(
numtyp
)
0.0
;
t
+=
aug
[
1
]
*
ans
[
1
];
t
+=
aug
[
2
]
*
ans
[
2
];
ans
[
0
]
=
(
aug
[
3
]
-
t
)
/
aug
[
0
];
}
/* ----------------------------------------------------------------------
compute rotation matrix from quaternion conjugate
quat = [w i j k]
------------------------------------------------------------------------- */
__inline
void
gpu_quat_to_mat_trans
(
__global
const
numtyp4
*
qif
,
const
int
qi
,
numtyp
mat
[
9
])
{
numtyp4
q
=
qif
[
qi
];
numtyp
w2
=
q
.
x
*
q
.
x
;
numtyp
i2
=
q
.
y
*
q
.
y
;
numtyp
j2
=
q
.
z
*
q
.
z
;
numtyp
k2
=
q
.
w
*
q
.
w
;
numtyp
twoij
=
(
numtyp
)
2.0
*
q
.
y
*
q
.
z
;
numtyp
twoik
=
(
numtyp
)
2.0
*
q
.
y
*
q
.
w
;
numtyp
twojk
=
(
numtyp
)
2.0
*
q
.
z
*
q
.
w
;
numtyp
twoiw
=
(
numtyp
)
2.0
*
q
.
y
*
q
.
x
;
numtyp
twojw
=
(
numtyp
)
2.0
*
q
.
z
*
q
.
x
;
numtyp
twokw
=
(
numtyp
)
2.0
*
q
.
w
*
q
.
x
;
mat
[
0
]
=
w2
+
i2
-
j2
-
k2
;
mat
[
3
]
=
twoij
-
twokw
;
mat
[
6
]
=
twojw
+
twoik
;
mat
[
1
]
=
twoij
+
twokw
;
mat
[
4
]
=
w2
-
i2
+
j2
-
k2
;
mat
[
7
]
=
twojk
-
twoiw
;
mat
[
2
]
=
twoik
-
twojw
;
mat
[
5
]
=
twojk
+
twoiw
;
mat
[
8
]
=
w2
-
i2
-
j2
+
k2
;
}
#endif
Event Timeline
Log In to Comment