gb_gpu_extra.h
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Tue, Feb 18, 23:56

gb_gpu_extra.h
View Options

	/* ----------------------------------------------------------------------
	LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
	http://lammps.sandia.gov, Sandia National Laboratories
	Steve Plimpton, sjplimp@sandia.gov

	Copyright (2003) Sandia Corporation. Under the terms of Contract
	DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
	certain rights in this software. This software is distributed under
	the GNU General Public License.

	See the README file in the top-level LAMMPS directory.
	------------------------------------------------------------------------- */

	/* ----------------------------------------------------------------------
	Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
	------------------------------------------------------------------------- */

	#ifndef GB_GPU_EXTRA_H
	#define GB_GPU_EXTRA_H

	enum{SPHERE_SPHERE,SPHERE_ELLIPSE,ELLIPSE_SPHERE,ELLIPSE_ELLIPSE};

	#ifdef _DOUBLE_DOUBLE
	#define numtyp double
	#define numtyp2 double2
	#define numtyp4 double4
	#define acctyp double
	#define acctyp4 double4
	#endif

	#ifdef _SINGLE_DOUBLE
	#define numtyp float
	#define numtyp2 float2
	#define numtyp4 float4
	#define acctyp double
	#define acctyp4 double4
	#endif

	#ifndef numtyp
	#define numtyp float
	#define numtyp2 float2
	#define numtyp4 float4
	#define acctyp float
	#define acctyp4 float4
	#endif

	#ifdef NV_KERNEL

	#include "nv_kernel_def.h"

	#else

	#pragma OPENCL EXTENSION cl_khr_fp64: enable
	#define GLOBAL_ID_X get_global_id(0)
	#define THREAD_ID_X get_local_id(0)
	#define BLOCK_ID_X get_group_id(0)
	#define BLOCK_SIZE_X get_local_size(0)
	#define __syncthreads() barrier(CLK_LOCAL_MEM_FENCE)
	#define __inline inline
	#define BLOCK_PAIR 64
	#define MAX_SHARED_TYPES 8

	#endif

	/* ----------------------------------------------------------------------
	dot product of 2 vectors
	------------------------------------------------------------------------- */

	__inline numtyp gpu_dot3(const numtyp v1, const numtyp v2)
	{
	return v1[0]v2[0]+v1[1]v2[1]+v1[2]*v2[2];
	}

	/* ----------------------------------------------------------------------
	cross product of 2 vectors
	------------------------------------------------------------------------- */

	__inline void gpu_cross3(const numtyp v1, const numtyp v2, numtyp *ans)
	{
	ans[0] = v1[1]v2[2]-v1[2]v2[1];
	ans[1] = v1[2]v2[0]-v1[0]v2[2];
	ans[2] = v1[0]v2[1]-v1[1]v2[0];
	}

	/* ----------------------------------------------------------------------
	determinant of a matrix
	------------------------------------------------------------------------- */

	__inline numtyp gpu_det3(const numtyp m[9])
	{
	numtyp ans = m[0]m[4]m[8] - m[0]m[5]m[7] -
	m[3]m[1]m[8] + m[3]m[2]m[7] +
	m[6]m[1]m[5] - m[6]m[2]m[4];
	return ans;
	}

	/* ----------------------------------------------------------------------
	diagonal matrix times a full matrix
	------------------------------------------------------------------------- */

	__inline void gpu_times3(const numtyp4 shape, const numtyp m[9],
	numtyp ans[9])
	{
	ans[0] = shape.x*m[0];
	ans[1] = shape.x*m[1];
	ans[2] = shape.x*m[2];
	ans[3] = shape.y*m[3];
	ans[4] = shape.y*m[4];
	ans[5] = shape.y*m[5];
	ans[6] = shape.z*m[6];
	ans[7] = shape.z*m[7];
	ans[8] = shape.z*m[8];
	}

	/* ----------------------------------------------------------------------
	add two matrices
	------------------------------------------------------------------------- */

	__inline void gpu_plus3(const numtyp m[9], const numtyp m2[9], numtyp ans[9])
	{
	ans[0] = m[0]+m2[0];
	ans[1] = m[1]+m2[1];
	ans[2] = m[2]+m2[2];
	ans[3] = m[3]+m2[3];
	ans[4] = m[4]+m2[4];
	ans[5] = m[5]+m2[5];
	ans[6] = m[6]+m2[6];
	ans[7] = m[7]+m2[7];
	ans[8] = m[8]+m2[8];
	}

	/* ----------------------------------------------------------------------
	multiply the transpose of mat1 times mat2
	------------------------------------------------------------------------- */

	__inline void gpu_transpose_times3(const numtyp m[9], const numtyp m2[9],
	numtyp ans[9])
	{
	ans[0] = m[0]m2[0]+m[3]m2[3]+m[6]*m2[6];
	ans[1] = m[0]m2[1]+m[3]m2[4]+m[6]*m2[7];
	ans[2] = m[0]m2[2]+m[3]m2[5]+m[6]*m2[8];
	ans[3] = m[1]m2[0]+m[4]m2[3]+m[7]*m2[6];
	ans[4] = m[1]m2[1]+m[4]m2[4]+m[7]*m2[7];
	ans[5] = m[1]m2[2]+m[4]m2[5]+m[7]*m2[8];
	ans[6] = m[2]m2[0]+m[5]m2[3]+m[8]*m2[6];
	ans[7] = m[2]m2[1]+m[5]m2[4]+m[8]*m2[7];
	ans[8] = m[2]m2[2]+m[5]m2[5]+m[8]*m2[8];
	}

	/* ----------------------------------------------------------------------
	row vector times matrix
	------------------------------------------------------------------------- */

	__inline void gpu_row_times3(const numtyp v, const numtyp m[9], numtyp ans)
	{
	ans[0] = m[0]v[0]+v[1]m[3]+v[2]*m[6];
	ans[1] = v[0]m[1]+m[4]v[1]+v[2]*m[7];
	ans[2] = v[0]m[2]+v[1]m[5]+m[8]*v[2];
	}

	/* ----------------------------------------------------------------------
	solve Ax = b or M ans = v
	use gaussian elimination & partial pivoting on matrix
	error_flag set to 2 if bad matrix inversion attempted
	------------------------------------------------------------------------- */

	__inline void gpu_mldivide3(const numtyp m[9], const numtyp v, numtyp ans,
	__global int *error_flag)
	{
	// create augmented matrix for pivoting

	numtyp aug[12], t;

	aug[3] = v[0];
	aug[0] = m[0];
	aug[1] = m[1];
	aug[2] = m[2];
	aug[7] = v[1];
	aug[4] = m[3];
	aug[5] = m[4];
	aug[6] = m[5];
	aug[11] = v[2];
	aug[8] = m[6];
	aug[9] = m[7];
	aug[10] = m[8];

	if (fabs(aug[4]) > fabs(aug[0])) {
	numtyp swapt;
	swapt=aug[0]; aug[0]=aug[4]; aug[4]=swapt;
	swapt=aug[1]; aug[1]=aug[5]; aug[5]=swapt;
	swapt=aug[2]; aug[2]=aug[6]; aug[6]=swapt;
	swapt=aug[3]; aug[3]=aug[7]; aug[7]=swapt;
	}
	if (fabs(aug[8]) > fabs(aug[0])) {
	numtyp swapt;
	swapt=aug[0]; aug[0]=aug[8]; aug[8]=swapt;
	swapt=aug[1]; aug[1]=aug[9]; aug[9]=swapt;
	swapt=aug[2]; aug[2]=aug[10]; aug[10]=swapt;
	swapt=aug[3]; aug[3]=aug[11]; aug[11]=swapt;
	}

	if (aug[0] != (numtyp)0.0) {
	if (0!=0) {
	numtyp swapt;
	swapt=aug[0]; aug[0]=aug[0]; aug[0]=swapt;
	swapt=aug[1]; aug[1]=aug[1]; aug[1]=swapt;
	swapt=aug[2]; aug[2]=aug[2]; aug[2]=swapt;
	swapt=aug[3]; aug[3]=aug[3]; aug[3]=swapt;
	}
	} else if (aug[4] != (numtyp)0.0) {
	if (1!=0) {
	numtyp swapt;
	swapt=aug[0]; aug[0]=aug[4]; aug[4]=swapt;
	swapt=aug[1]; aug[1]=aug[5]; aug[5]=swapt;
	swapt=aug[2]; aug[2]=aug[6]; aug[6]=swapt;
	swapt=aug[3]; aug[3]=aug[7]; aug[7]=swapt;
	}
	} else if (aug[8] != (numtyp)0.0) {
	if (2!=0) {
	numtyp swapt;
	swapt=aug[0]; aug[0]=aug[8]; aug[8]=swapt;
	swapt=aug[1]; aug[1]=aug[9]; aug[9]=swapt;
	swapt=aug[2]; aug[2]=aug[10]; aug[10]=swapt;
	swapt=aug[3]; aug[3]=aug[11]; aug[11]=swapt;
	}
	} else
	*error_flag=2;

	t = aug[4]/aug[0];
	aug[5]-=t*aug[1];
	aug[6]-=t*aug[2];
	aug[7]-=t*aug[3];
	t = aug[8]/aug[0];
	aug[9]-=t*aug[1];
	aug[10]-=t*aug[2];
	aug[11]-=t*aug[3];

	if (fabs(aug[9]) > fabs(aug[5])) {
	numtyp swapt;
	swapt=aug[4]; aug[4]=aug[8]; aug[8]=swapt;
	swapt=aug[5]; aug[5]=aug[9]; aug[9]=swapt;
	swapt=aug[6]; aug[6]=aug[10]; aug[10]=swapt;
	swapt=aug[7]; aug[7]=aug[11]; aug[11]=swapt;
	}

	if (aug[5] != (numtyp)0.0) {
	if (1!=1) {
	numtyp swapt;
	swapt=aug[4]; aug[4]=aug[4]; aug[4]=swapt;
	swapt=aug[5]; aug[5]=aug[5]; aug[5]=swapt;
	swapt=aug[6]; aug[6]=aug[6]; aug[6]=swapt;
	swapt=aug[7]; aug[7]=aug[7]; aug[7]=swapt;
	}
	} else if (aug[9] != (numtyp)0.0) {
	if (2!=1) {
	numtyp swapt;
	swapt=aug[4]; aug[4]=aug[8]; aug[8]=swapt;
	swapt=aug[5]; aug[5]=aug[9]; aug[9]=swapt;
	swapt=aug[6]; aug[6]=aug[10]; aug[10]=swapt;
	swapt=aug[7]; aug[7]=aug[11]; aug[11]=swapt;
	}
	}

	t = aug[9]/aug[5];
	aug[10]-=t*aug[6];
	aug[11]-=t*aug[7];

	if (aug[10] == (numtyp)0.0)
	*error_flag=2;

	ans[2] = aug[11]/aug[10];
	t = (numtyp)0.0;
	t += aug[6]*ans[2];
	ans[1] = (aug[7]-t) / aug[5];
	t = (numtyp)0.0;
	t += aug[1]*ans[1];
	t += aug[2]*ans[2];
	ans[0] = (aug[3]-t) / aug[0];
	}

	/* ----------------------------------------------------------------------
	compute rotation matrix from quaternion conjugate
	quat = [w i j k]
	------------------------------------------------------------------------- */

	__inline void gpu_quat_to_mat_trans(__global const numtyp4 *qif, const int qi,
	numtyp mat[9])
	{
	numtyp4 q=qif[qi];

	numtyp w2 = q.x*q.x;
	numtyp i2 = q.y*q.y;
	numtyp j2 = q.z*q.z;
	numtyp k2 = q.w*q.w;
	numtyp twoij = (numtyp)2.0q.yq.z;
	numtyp twoik = (numtyp)2.0q.yq.w;
	numtyp twojk = (numtyp)2.0q.zq.w;
	numtyp twoiw = (numtyp)2.0q.yq.x;
	numtyp twojw = (numtyp)2.0q.zq.x;
	numtyp twokw = (numtyp)2.0q.wq.x;

	mat[0] = w2+i2-j2-k2;
	mat[3] = twoij-twokw;
	mat[6] = twojw+twoik;

	mat[1] = twoij+twokw;
	mat[4] = w2-i2+j2-k2;
	mat[7] = twojk-twoiw;

	mat[2] = twoik-twojw;
	mat[5] = twojk+twoiw;
	mat[8] = w2-i2-j2+k2;
	}

	#endif

gb_gpu_extra.hNo OneTemporaryActions

File Metadata

gb_gpu_extra.hView Options

Event Timeline

gb_gpu_extra.h
No OneTemporary
Actions

gb_gpu_extra.h
View Options