Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F90374719
mgpt_linalg.cpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Fri, Nov 1, 02:04
Size
5 KB
Mime Type
text/x-c
Expires
Sun, Nov 3, 02:04 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
21814652
Attached To
rLAMMPS lammps
mgpt_linalg.cpp
View Options
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
This file is part of the MGPT implementation. See further comments
in pair_mgpt.cpp and pair_mgpt.h.
------------------------------------------------------------------------- */
#include "mgpt_linalg.h"
#include <cstdio>
#include <cstdlib>
#include <cmath>
#include <cassert>
#define restrict __restrict__
#ifdef IBM_BG_SIMD
#include <builtins.h>
/* Double precision 440d (double hummer) matrix multiplication */
#define const
#include "mgpt_mmul_bg_552.c.h"
#include "mgpt_mmul_bg_722.c.h"
#include "mgpt_bgmul_7.c.h"
/* Double precision 440d (double hummer) product trace */
#define real double
#include "mgpt_ttr_5123.c.h"
#include "mgpt_ttr_7123.c.h"
#undef real
#undef const
#endif
#ifdef IBM_BGQ_SIMD
/* Double precision QPX matrix multiplication */
#include "mgpt_mmul_bgq_n5_lda8_2x8.c.h"
#include "mgpt_mmul_bgq_n7_lda8_4x8.c.h"
/* Double precision QPX product trace */
#include "mgpt_ttr_5141.c.h"
#include "mgpt_ttr_7141.c.h"
#endif
#ifdef x86_SIMD
/* Double precision SSE2 matrix multiplication */
#include "mgpt_mmul3d_526.c.h"
#include "mgpt_mmul3d_744.c.h"
/* Single precision SSE2 matrix multiplication */
#include "mgpt_mmul3_538.c.h"
#include "mgpt_mmul3_748.c.h"
/* Double precision SSE3 product trace */
#define real double
#include "mgpt_ttr_5022.c.h"
#include "mgpt_ttr_7022.c.h"
#undef real
/* Single precision SSE3 product trace */
#define real float
#include "mgpt_ttr_5042.c.h"
#include "mgpt_ttr_7042.c.h"
#undef real
#endif
#if defined(IBM_BG_SIMD) || defined(IBM_BGQ_SIMD)
#define const
#endif
static void transprod_generic(const double * restrict A,
const double * restrict B,
double * restrict C) {
const int lda = 8,n = mgpt_linalg::matrix_size;
int i,j,k;
double s;
for(i = 0; i<n; i++)
for(j = 0; j<n; j++) {
s = 0.0;
for(k = 1; k<=n; k++)
s = s + A[i*lda+k]*B[j*lda+k];
C[i*lda+(j+1)] = s;
}
}
static void transtrace3_generic(const double * restrict A,
const double * restrict B0,double * restrict tout0,
const double * restrict B1,double * restrict tout1,
const double * restrict B2,double * restrict tout2) {
const int lda = 8,n = mgpt_linalg::matrix_size;
double t0 = 0.0,t1 = 0.0,t2 = 0.0;
int i,j;
for(i = 0; i<n; i++)
for(j = 1; j<=n; j++) {
int idx = i*lda + j;
double atmp = A[idx];
t0 = t0 + atmp*B0[idx];
t1 = t1 + atmp*B1[idx];
t2 = t2 + atmp*B2[idx];
}
*tout0 = t0;
*tout1 = t1;
*tout2 = t2;
}
static void transprod_error(const double * restrict A,
const double * restrict B,
double * restrict C) {
printf("Linear algebra subroutines not initialized (transprod).\n");
exit(1);
}
static void transtrace3_error(const double * restrict A,
const double * restrict B0,double * restrict tout0,
const double * restrict B1,double * restrict tout1,
const double * restrict B2,double * restrict tout2) {
printf("Linear algebra subroutines not initialized (transtrace3).\n");
exit(1);
}
#if defined(IBM_BG_SIMD) || defined(IBM_BGQ_SIMD)
#undef const
#endif
#undef restrict
int mgpt_linalg::matrix_size;
mgpt_linalg::mgpt_linalg() {
mgpt_linalg::matrix_size = 0;
tr_mul = transprod_error;
tr_trace = transtrace3_error;
single = 0;
}
mgpt_linalg::mgpt_linalg(int n,int single_precision) {
mgpt_linalg::matrix_size = n;
tr_mul = transprod_generic;
tr_trace = transtrace3_generic;
single = 0;
msg = "@@@ Choosing generic (unoptimized) linear algebra routines.\n";
#ifdef IBM_BG_SIMD
msg = "@@@ Choosing BG/L optimized linear algebra routines.\n";
if(n == 5) {
tr_mul = mmul_bg_5_8_5x2v2;
tr_trace = ttr_bg_5_8_3_v2r3;
} else if(n == 7) {
//tr_mul = mmul_bg_7_8_2x2v2;
tr_mul = (trmul_fun) bgmul_7;
tr_trace = ttr_bg_7_8_3_v2r3;
}
#elif defined(IBM_BGQ_SIMD)
msg = "@@@ Choosing BG/Q optimized linear algebra routines.\n";
if(1) {
if(n == 5) {
tr_mul = mmul_bgq_n5_lda8_2x8;
tr_trace = ttr_bg_5_8_3_v4r1;
} else if(n == 7) {
tr_mul = mmul_bgq_n7_lda8_4x8;
tr_trace = ttr_bg_7_8_3_v4r1;
}
}
#elif defined(x86_SIMD)
if(single_precision) {
msg = "@@@ Choosing Intel/AMD single precision linear algebra routines.\n";
if(n == 5) {
tr_mul = (trmul_fun) mmul3_5_8_3x8v4;
tr_trace = (trtrace3_fun) ttr_5_8_3_v4r2;
single = 1;
} else if(n == 7) {
tr_mul = (trmul_fun) mmul3_7_8_4x8v4;
tr_trace = (trtrace3_fun) ttr_7_8_3_v4r2;
single = 1;
}
} else {
msg = "@@@ Choosing Intel/AMD double precision linear algebra routines.\n";
if(n == 5) {
tr_mul = mmul3_5_8_2x6v2;
tr_trace = ttr_5_8_3_v2r2;
} else if(n == 7) {
tr_mul = mmul3_7_8_4x4v2;
tr_trace = ttr_7_8_3_v2r2;
}
}
#endif
}
Event Timeline
Log In to Comment