File Metadata

Created: Wed, Jul 2, 09:27

mgpt_linalg.cpp
View Options

	/* ----------------------------------------------------------------------
	LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
	http://lammps.sandia.gov, Sandia National Laboratories
	Steve Plimpton, sjplimp@sandia.gov

	Copyright (2003) Sandia Corporation. Under the terms of Contract
	DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
	certain rights in this software. This software is distributed under
	the GNU General Public License.

	See the README file in the top-level LAMMPS directory.
	------------------------------------------------------------------------- */

	/* ----------------------------------------------------------------------
	This file is part of the MGPT implementation. See further comments
	in pair_mgpt.cpp and pair_mgpt.h.
	------------------------------------------------------------------------- */

	#include "mgpt_linalg.h"

	#include <cstdio>
	#include <cstdlib>
	#include <cmath>
	#include <cassert>

	#define restrict __restrict__

	#ifdef IBM_BG_SIMD
	#include <builtins.h>

	/* Double precision 440d (double hummer) matrix multiplication */
	#define const
	#include "mgpt_mmul_bg_552.c.h"
	#include "mgpt_mmul_bg_722.c.h"
	#include "mgpt_bgmul_7.c.h"

	/* Double precision 440d (double hummer) product trace */
	#define real double
	#include "mgpt_ttr_5123.c.h"
	#include "mgpt_ttr_7123.c.h"
	#undef real
	#undef const
	#endif


	#ifdef IBM_BGQ_SIMD
	/* Double precision QPX matrix multiplication */
	#include "mgpt_mmul_bgq_n5_lda8_2x8.c.h"
	#include "mgpt_mmul_bgq_n7_lda8_4x8.c.h"

	/* Double precision QPX product trace */
	#include "mgpt_ttr_5141.c.h"
	#include "mgpt_ttr_7141.c.h"
	#endif


	#ifdef x86_SIMD
	/* Double precision SSE2 matrix multiplication */
	#include "mgpt_mmul3d_526.c.h"
	#include "mgpt_mmul3d_744.c.h"

	/* Single precision SSE2 matrix multiplication */
	#include "mgpt_mmul3_538.c.h"
	#include "mgpt_mmul3_748.c.h"

	/* Double precision SSE3 product trace */
	#define real double
	#include "mgpt_ttr_5022.c.h"
	#include "mgpt_ttr_7022.c.h"
	#undef real

	/* Single precision SSE3 product trace */
	#define real float
	#include "mgpt_ttr_5042.c.h"
	#include "mgpt_ttr_7042.c.h"
	#undef real

	#endif

	#if defined(IBM_BG_SIMD) \|\| defined(IBM_BGQ_SIMD)
	#define const
	#endif
	static void transprod_generic(const double * restrict A,
	const double * restrict B,
	double * restrict C) {
	const int lda = 8,n = mgpt_linalg::matrix_size;
	int i,j,k;
	double s;
	for(i = 0; i<n; i++)
	for(j = 0; j<n; j++) {
	s = 0.0;
	for(k = 1; k<=n; k++)
	s = s + A[ilda+k]B[j*lda+k];
	C[i*lda+(j+1)] = s;
	}
	}

	static void transtrace3_generic(const double * restrict A,
	const double * restrict B0,double * restrict tout0,
	const double * restrict B1,double * restrict tout1,
	const double * restrict B2,double * restrict tout2) {
	const int lda = 8,n = mgpt_linalg::matrix_size;
	double t0 = 0.0,t1 = 0.0,t2 = 0.0;
	int i,j;

	for(i = 0; i<n; i++)
	for(j = 1; j<=n; j++) {
	int idx = i*lda + j;
	double atmp = A[idx];
	t0 = t0 + atmp*B0[idx];
	t1 = t1 + atmp*B1[idx];
	t2 = t2 + atmp*B2[idx];
	}
	*tout0 = t0;
	*tout1 = t1;
	*tout2 = t2;
	}

	static void transprod_error(const double * restrict A,
	const double * restrict B,
	double * restrict C) {
	printf("Linear algebra subroutines not initialized (transprod).\n");
	exit(1);
	}
	static void transtrace3_error(const double * restrict A,
	const double * restrict B0,double * restrict tout0,
	const double * restrict B1,double * restrict tout1,
	const double * restrict B2,double * restrict tout2) {
	printf("Linear algebra subroutines not initialized (transtrace3).\n");
	exit(1);
	}


	#if defined(IBM_BG_SIMD) \|\| defined(IBM_BGQ_SIMD)
	#undef const
	#endif

	#undef restrict

	int mgpt_linalg::matrix_size;

	mgpt_linalg::mgpt_linalg() {
	mgpt_linalg::matrix_size = 0;

	tr_mul = transprod_error;
	tr_trace = transtrace3_error;
	single = 0;
	}

	mgpt_linalg::mgpt_linalg(int n,int single_precision) {

	mgpt_linalg::matrix_size = n;

	tr_mul = transprod_generic;
	tr_trace = transtrace3_generic;
	single = 0;
	msg = "@@@ Choosing generic (unoptimized) linear algebra routines.\n";

	#ifdef IBM_BG_SIMD
	msg = "@@@ Choosing BG/L optimized linear algebra routines.\n";
	if(n == 5) {
	tr_mul = mmul_bg_5_8_5x2v2;
	tr_trace = ttr_bg_5_8_3_v2r3;
	} else if(n == 7) {
	//tr_mul = mmul_bg_7_8_2x2v2;
	tr_mul = (trmul_fun) bgmul_7;
	tr_trace = ttr_bg_7_8_3_v2r3;
	}
	#elif defined(IBM_BGQ_SIMD)
	msg = "@@@ Choosing BG/Q optimized linear algebra routines.\n";
	if(1) {
	if(n == 5) {
	tr_mul = mmul_bgq_n5_lda8_2x8;
	tr_trace = ttr_bg_5_8_3_v4r1;
	} else if(n == 7) {
	tr_mul = mmul_bgq_n7_lda8_4x8;
	tr_trace = ttr_bg_7_8_3_v4r1;
	}
	}
	#elif defined(x86_SIMD)
	if(single_precision) {
	msg = "@@@ Choosing Intel/AMD single precision linear algebra routines.\n";
	if(n == 5) {
	tr_mul = (trmul_fun) mmul3_5_8_3x8v4;
	tr_trace = (trtrace3_fun) ttr_5_8_3_v4r2;
	single = 1;
	} else if(n == 7) {
	tr_mul = (trmul_fun) mmul3_7_8_4x8v4;
	tr_trace = (trtrace3_fun) ttr_7_8_3_v4r2;
	single = 1;
	}
	} else {
	msg = "@@@ Choosing Intel/AMD double precision linear algebra routines.\n";
	if(n == 5) {
	tr_mul = mmul3_5_8_2x6v2;
	tr_trace = ttr_5_8_3_v2r2;
	} else if(n == 7) {
	tr_mul = mmul3_7_8_4x4v2;
	tr_trace = ttr_7_8_3_v2r2;
	}
	}
	#endif
	}

mgpt_linalg.cpp
No OneTemporary
Actions

File Metadata

mgpt_linalg.cpp
View Options

Event Timeline

mgpt_linalg.cppNo OneTemporaryActions

File Metadata

mgpt_linalg.cppView Options

Event Timeline

mgpt_linalg.cpp
No OneTemporary
Actions

mgpt_linalg.cpp
View Options